Commit d01cebe1 authored by Romain Courteaud's avatar Romain Courteaud

ParserStorage: extract data from RSS/OPML.

Thanks to Alain Takoudjou <alain.takoudjou@nexedi.com> for his original work on this topic.
parent c1e8827f
...@@ -169,6 +169,7 @@ module.exports = function (grunt) { ...@@ -169,6 +169,7 @@ module.exports = function (grunt) {
'src/jio.storage/uuidstorage.js', 'src/jio.storage/uuidstorage.js',
'src/jio.storage/memorystorage.js', 'src/jio.storage/memorystorage.js',
'src/jio.storage/zipstorage.js', 'src/jio.storage/zipstorage.js',
'src/jio.storage/parserstorage.js',
'src/jio.storage/httpstorage.js', 'src/jio.storage/httpstorage.js',
'src/jio.storage/dropboxstorage.js', 'src/jio.storage/dropboxstorage.js',
'src/jio.storage/davstorage.js', 'src/jio.storage/davstorage.js',
......
/*jslint nomen: true*/
/*global jIO, DOMParser */
(function (jIO, DOMParser) {
"use strict";
/////////////////////////////////////////////////////////////
// OPML Parser
/////////////////////////////////////////////////////////////
function OPMLParser(txt) {
this._dom_parser = new DOMParser().parseFromString(txt, 'text/xml');
}
OPMLParser.prototype.parseHead = function () {
// fetch all children instead
var channel_element = this._dom_parser.querySelector("opml > head"),
tag_element,
i,
result = {};
for (i = channel_element.childNodes.length - 1; i >= 0; i -= 1) {
tag_element = channel_element.childNodes[i];
result[tag_element.tagName] = tag_element.textContent;
}
return result;
};
OPMLParser.prototype.parseOutline = function (result_list, outline_element,
prefix, include, id) {
var attribute,
i,
child,
result = {};
if ((id === prefix) || (id === undefined)) {
result_list.push({
id: prefix,
value: {}
});
if (include) {
for (i = outline_element.attributes.length - 1; i >= 0; i -= 1) {
attribute = outline_element.attributes[i];
if (attribute.value) {
result[attribute.name] = attribute.value;
}
}
result_list[result_list.length - 1].doc = result;
}
}
for (i = outline_element.childNodes.length - 1; i >= 0; i -= 1) {
child = outline_element.childNodes[i];
if (child.tagName === 'outline') {
this.parseOutline(result_list, child, prefix + '/' + i, include, id);
}
}
};
OPMLParser.prototype.getDocumentList = function (include, id) {
var result_list,
item_list = this._dom_parser.querySelectorAll("body > outline"),
i;
if ((id === '/0') || (id === undefined)) {
result_list = [{
id: '/0',
value: {}
}];
if (include) {
result_list[0].doc = this.parseHead();
}
} else {
result_list = [];
}
for (i = 0; i < item_list.length; i += 1) {
this.parseOutline(result_list, item_list[i], '/1/' + i, include, id);
}
return result_list;
};
/////////////////////////////////////////////////////////////
// RSS Parser
/////////////////////////////////////////////////////////////
function RSSParser(txt) {
this._dom_parser = new DOMParser().parseFromString(txt, 'text/xml');
}
RSSParser.prototype.parseElement = function (element) {
var tag_element,
i,
j,
attribute,
result = {};
for (i = element.childNodes.length - 1; i >= 0; i -= 1) {
tag_element = element.childNodes[i];
if (tag_element.tagName !== 'item') {
result[tag_element.tagName] = tag_element.textContent;
for (j = tag_element.attributes.length - 1; j >= 0; j -= 1) {
attribute = tag_element.attributes[j];
if (attribute.value) {
result[tag_element.tagName + '_' + attribute.name] =
attribute.value;
}
}
}
}
return result;
};
RSSParser.prototype.getDocumentList = function (include, id) {
var result_list,
item_list = this._dom_parser.querySelectorAll("rss > channel > item"),
i;
if ((id === '/0') || (id === undefined)) {
result_list = [{
id: '/0',
value: {}
}];
if (include) {
result_list[0].doc = this.parseElement(
this._dom_parser.querySelector("rss > channel")
);
}
} else {
result_list = [];
}
for (i = 0; i < item_list.length; i += 1) {
if ((id === '/0/' + i) || (id === undefined)) {
result_list.push({
id: '/0/' + i,
value: {}
});
if (include) {
result_list[result_list.length - 1].doc =
this.parseElement(item_list[i]);
}
}
}
return result_list;
};
/////////////////////////////////////////////////////////////
// Helpers
/////////////////////////////////////////////////////////////
var parser_dict = {
'rss': RSSParser,
'opml': OPMLParser
};
function getParser(storage) {
return storage._sub_storage.getAttachment(storage._document_id,
storage._attachment_id,
{format: 'text'})
.push(function (txt) {
return new parser_dict[storage._parser_name](txt);
});
}
/////////////////////////////////////////////////////////////
// Storage
/////////////////////////////////////////////////////////////
function ParserStorage(spec) {
this._attachment_id = spec.attachment_id;
this._document_id = spec.document_id;
this._parser_name = spec.parser;
this._sub_storage = jIO.createJIO(spec.sub_storage);
}
ParserStorage.prototype.hasCapacity = function (capacity) {
return (capacity === "list") || (capacity === 'include');
};
ParserStorage.prototype.buildQuery = function (options) {
if (options === undefined) {
options = {};
}
return getParser(this)
.push(function (parser) {
return parser.getDocumentList((options.include_docs || false));
});
};
ParserStorage.prototype.get = function (id) {
return getParser(this)
.push(function (parser) {
var result_list = parser.getDocumentList(true, id);
if (result_list.length) {
return result_list[0].doc;
}
throw new jIO.util.jIOError(
"Cannot find parsed document: " + id,
404
);
});
};
jIO.addStorage('parser', ParserStorage);
}(jIO, DOMParser));
\ No newline at end of file
/*jslint nomen: true */
/*global jIO, QUnit, Blob*/
(function (jIO, QUnit, Blob) {
"use strict";
var test = QUnit.test,
stop = QUnit.stop,
start = QUnit.start,
ok = QUnit.ok,
expect = QUnit.expect,
deepEqual = QUnit.deepEqual,
equal = QUnit.equal,
module = QUnit.module;
/////////////////////////////////////////////////////////////////
// Custom RSS test substorage definition
/////////////////////////////////////////////////////////////////
function RSSStorage200() {
return this;
}
RSSStorage200.prototype.getAttachment = function (id, name) {
equal(id, 'foo');
equal(name, 'bar');
var txt = '<?xml version="1.0" encoding="UTF-8" ?>' +
'<rss version="2.0">' +
'<channel>' +
'<title>RSS Example</title>' +
'<description>This is an example of an RSS feed</description>' +
'<link>http://www.domain.com/link.htm</link>' +
'<lastBuildDate>Mon, 28 Aug 2006 11:12:55 -0400 </lastBuildDate>' +
'<pubDate>Tue, 29 Aug 2006 09:00:00 -0400</pubDate>' +
'<item>' +
'<title>Item Example</title>' +
'<description>This is an example of an Item</description>' +
'<link>http://www.domain.com/link.htm</link>' +
'<guid isPermaLink="false">1102345</guid>' +
'<pubDate>Tue, 29 Aug 2006 09:00:00 -0400</pubDate>' +
'</item>' +
'<item>' +
'<title>Item Example</title>' +
'<description>This is another example of an Item</description>' +
'<link>http://www.domain.com/link2.htm</link>' +
'<guid isPermaLink="false">11023-258</guid>' +
'<pubDate>Tue, 29 Aug 2006 09:00:00 -0400</pubDate>' +
'</item>' +
'</channel>' +
'</rss>';
return new Blob([txt]);
};
jIO.addStorage('rssstorage200', RSSStorage200);
/////////////////////////////////////////////////////////////////
// Custom OPML test substorage definition
/////////////////////////////////////////////////////////////////
function OPMLStorage200() {
return this;
}
OPMLStorage200.prototype.getAttachment = function (id, name) {
equal(id, 'foo');
equal(name, 'bar');
var txt = '<?xml version="1.0" encoding="ISO-8859-1"?>' +
'<opml version="1.0">' +
'<head>' +
'<title>feedOnFeeds.xml</title>' +
'<dateCreated>Thu, 12 Sep 2003 23:35:52 GMT</dateCreated>' +
'<dateModified>Fri, 12 Sep 2003 23:45:37 GMT</dateModified>' +
'<ownerName>SomeUser</ownerName>' +
'<ownerEmail>newsfor@example.com</ownerEmail>' +
'<link>http://opml.example.com/opml.xml</link>' +
'</head>' +
'<body>' +
'<outline text="Sample OPML">' +
'<outline text="Mobile News" type="link" url="http://opml.example.' +
'com/feeds/mobile.xml" dateCreated="Thu, 12 Sep 2003 23:35:52 GMT"/>' +
'<outline text="Syndication News" type="link" url="http://opml.examp' +
'le.com/feeds/syndication.xml" dateCreated="Thu, 12 Sep 2003 23:35:52' +
'GMT"/>' +
'</outline>' +
'<outline text="World News">' +
'<outline text="Politics" type="link" url="http://opml.example.com/' +
'feeds/politics.xml" dateCreated="Thu, 12 Sep 2003 23:35:52 GMT"/>' +
'<outline text="Sports" type="link" url="http://opml.example.com/fee' +
'ds/sports.xml" dateCreated="Thu, 12 Sep 2003 23:35:52 GMT"/>' +
'</outline>' +
'<outline text="Various">' +
'<outline text="Weather" type="link" url="http://opml.example.com/fe' +
'eds/weather.xml" dateCreated="Thu, 12 Sep 2003 23:35:52 GMT"/>' +
'<outline text="Entertainment" type="link" url="http://opml.example.' +
'com/feeds/ent.xml" dateCreated="Thu, 12 Sep 2003 23:35:52 GMT"/>' +
'</outline>' +
'</body>' +
'</opml>';
return new Blob([txt]);
};
jIO.addStorage('opmlstorage200', OPMLStorage200);
/////////////////////////////////////////////////////////////////
// Constructor
/////////////////////////////////////////////////////////////////
module("ParserStorage.constructor");
test("Storage stores parameters", function () {
var jio = jIO.createJIO({
type: 'parser',
document_id: 'fooname',
attachment_id: 'barname',
parser: 'fooparser',
sub_storage: {
type: 'memory'
}
});
equal(jio.__storage._sub_storage.__type, "memory");
equal(jio.__storage._document_id, "fooname");
equal(jio.__storage._attachment_id, "barname");
equal(jio.__storage._parser_name, "fooparser");
equal(jio.__storage._parser, undefined);
});
/////////////////////////////////////////////////////////////////
// ParserStorage.allDocs
/////////////////////////////////////////////////////////////////
module("ParserStorage.allDocs");
test("get all IDs from RSS", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'rss',
sub_storage: {
type: 'rssstorage200'
}
});
var expected_dict = {
"data": {
"rows": [
{
"id": "/0",
"value": {}
},
{
"id": "/0/0",
"value": {}
},
{
"id": "/0/1",
"value": {}
}
],
"total_rows": 3
}
};
stop();
expect(3);
this.jio.allDocs()
.then(function (result) {
deepEqual(result, expected_dict, "Check documents");
})
.fail(function (error) {
ok(false, error);
})
.always(function () {
start();
});
});
test("get all documents from RSS", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'rss',
sub_storage: {
type: 'rssstorage200'
}
});
var expected_dict = {
"data": {
"rows": [
{
"doc": {
"description": "This is an example of an RSS feed",
"lastBuildDate": "Mon, 28 Aug 2006 11:12:55 -0400 ",
"link": "http://www.domain.com/link.htm",
"pubDate": "Tue, 29 Aug 2006 09:00:00 -0400",
"title": "RSS Example"
},
"id": "/0",
"value": {}
},
{
"doc": {
"description": "This is an example of an Item",
"guid": "1102345",
"guid_isPermaLink": "false",
"link": "http://www.domain.com/link.htm",
"pubDate": "Tue, 29 Aug 2006 09:00:00 -0400",
"title": "Item Example"
},
"id": "/0/0",
"value": {}
},
{
"doc": {
"description": "This is another example of an Item",
"guid": "11023-258",
"guid_isPermaLink": "false",
"link": "http://www.domain.com/link2.htm",
"pubDate": "Tue, 29 Aug 2006 09:00:00 -0400",
"title": "Item Example"
},
"id": "/0/1",
"value": {}
}
],
"total_rows": 3
}
};
stop();
expect(3);
this.jio.allDocs({include_docs: true})
.then(function (result) {
deepEqual(result, expected_dict, "Check documents");
})
.fail(function (error) {
ok(false, error);
})
.always(function () {
start();
});
});
test("get all IDs from OPML", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'opml',
sub_storage: {
type: 'opmlstorage200'
}
});
var expected_dict = {
"data": {
"rows": [
{
"id": "/0",
"value": {}
},
{
"id": "/1/0",
"value": {}
},
{
"id": "/1/0/1",
"value": {}
},
{
"id": "/1/0/0",
"value": {}
},
{
"id": "/1/1",
"value": {}
},
{
"id": "/1/1/1",
"value": {}
},
{
"id": "/1/1/0",
"value": {}
},
{
"id": "/1/2",
"value": {}
},
{
"id": "/1/2/1",
"value": {}
},
{
"id": "/1/2/0",
"value": {}
}
],
"total_rows": 10
}
};
stop();
expect(3);
this.jio.allDocs()
.then(function (result) {
deepEqual(result, expected_dict, "Check documents");
})
.fail(function (error) {
ok(false, error);
})
.always(function () {
start();
});
});
test("get all documents from OPML", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'opml',
sub_storage: {
type: 'opmlstorage200'
}
});
var expected_dict = {
"data": {
"rows": [
{
"doc": {
"dateCreated": "Thu, 12 Sep 2003 23:35:52 GMT",
"dateModified": "Fri, 12 Sep 2003 23:45:37 GMT",
"link": "http://opml.example.com/opml.xml",
"ownerEmail": "newsfor@example.com",
"ownerName": "SomeUser",
"title": "feedOnFeeds.xml"
},
"id": "/0",
"value": {}
},
{
"doc": {
"text": "Sample OPML"
},
"id": "/1/0",
"value": {}
},
{
"doc": {
"dateCreated": "Thu, 12 Sep 2003 23:35:52GMT",
"text": "Syndication News",
"type": "link",
"url": "http://opml.example.com/feeds/syndication.xml"
},
"id": "/1/0/1",
"value": {}
},
{
"doc": {
"dateCreated": "Thu, 12 Sep 2003 23:35:52 GMT",
"text": "Mobile News",
"type": "link",
"url": "http://opml.example.com/feeds/mobile.xml"
},
"id": "/1/0/0",
"value": {}
},
{
"doc": {
"text": "World News"
},
"id": "/1/1",
"value": {}
},
{
"doc": {
"dateCreated": "Thu, 12 Sep 2003 23:35:52 GMT",
"text": "Sports",
"type": "link",
"url": "http://opml.example.com/feeds/sports.xml"
},
"id": "/1/1/1",
"value": {}
},
{
"doc": {
"dateCreated": "Thu, 12 Sep 2003 23:35:52 GMT",
"text": "Politics",
"type": "link",
"url": "http://opml.example.com/feeds/politics.xml"
},
"id": "/1/1/0",
"value": {}
},
{
"doc": {
"text": "Various"
},
"id": "/1/2",
"value": {}
},
{
"doc": {
"dateCreated": "Thu, 12 Sep 2003 23:35:52 GMT",
"text": "Entertainment",
"type": "link",
"url": "http://opml.example.com/feeds/ent.xml"
},
"id": "/1/2/1",
"value": {}
},
{
"doc": {
"dateCreated": "Thu, 12 Sep 2003 23:35:52 GMT",
"text": "Weather",
"type": "link",
"url": "http://opml.example.com/feeds/weather.xml"
},
"id": "/1/2/0",
"value": {}
}
],
"total_rows": 10
}
};
stop();
expect(3);
this.jio.allDocs({include_docs: true})
.then(function (result) {
deepEqual(result, expected_dict, "Check documents");
})
.fail(function (error) {
ok(false, error);
})
.always(function () {
start();
});
});
/////////////////////////////////////////////////////////////////
// ParserStorage.get
/////////////////////////////////////////////////////////////////
module("ParserStorage.get");
test("get RSS channel", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'rss',
sub_storage: {
type: 'rssstorage200'
}
});
stop();
expect(3);
this.jio.get('/0')
.then(function (result) {
deepEqual(result, {
"description": "This is an example of an RSS feed",
"lastBuildDate": "Mon, 28 Aug 2006 11:12:55 -0400 ",
"link": "http://www.domain.com/link.htm",
"pubDate": "Tue, 29 Aug 2006 09:00:00 -0400",
"title": "RSS Example"
}, "Check document");
})
.fail(function (error) {
ok(false, error);
})
.always(function () {
start();
});
});
test("get RSS item", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'rss',
sub_storage: {
type: 'rssstorage200'
}
});
stop();
expect(3);
this.jio.get('/0/1')
.then(function (result) {
deepEqual(result, {
"description": "This is another example of an Item",
"guid": "11023-258",
"guid_isPermaLink": "false",
"link": "http://www.domain.com/link2.htm",
"pubDate": "Tue, 29 Aug 2006 09:00:00 -0400",
"title": "Item Example"
}, "Check document");
})
.fail(function (error) {
ok(false, error);
})
.always(function () {
start();
});
});
test("get unknown RSS item", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'rss',
sub_storage: {
type: 'rssstorage200'
}
});
stop();
expect(5);
this.jio.get('foo')
.then(function (result) {
ok(false, result);
})
.fail(function (error) {
ok(error instanceof jIO.util.jIOError, error);
equal(error.message, "Cannot find parsed document: foo");
equal(error.status_code, 404);
})
.always(function () {
start();
});
});
test("get OPML head", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'opml',
sub_storage: {
type: 'opmlstorage200'
}
});
stop();
expect(3);
this.jio.get('/0')
.then(function (result) {
deepEqual(result, {
"dateCreated": "Thu, 12 Sep 2003 23:35:52 GMT",
"dateModified": "Fri, 12 Sep 2003 23:45:37 GMT",
"link": "http://opml.example.com/opml.xml",
"ownerEmail": "newsfor@example.com",
"ownerName": "SomeUser",
"title": "feedOnFeeds.xml"
}, "Check document");
})
.fail(function (error) {
ok(false, error);
})
.always(function () {
start();
});
});
test("get OPML outline", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'opml',
sub_storage: {
type: 'opmlstorage200'
}
});
stop();
expect(3);
this.jio.get('/1/0/1')
.then(function (result) {
deepEqual(result, {
"dateCreated": "Thu, 12 Sep 2003 23:35:52GMT",
"text": "Syndication News",
"type": "link",
"url": "http://opml.example.com/feeds/syndication.xml"
}, "Check document");
})
.fail(function (error) {
ok(false, error);
})
.always(function () {
start();
});
});
test("get unknown OPML outline", function () {
this.jio = jIO.createJIO({
type: 'parser',
document_id: 'foo',
attachment_id: 'bar',
parser: 'opml',
sub_storage: {
type: 'opmlstorage200'
}
});
stop();
expect(5);
this.jio.get('foo')
.then(function (result) {
ok(false, result);
})
.fail(function (error) {
ok(error instanceof jIO.util.jIOError, error);
equal(error.message, "Cannot find parsed document: foo");
equal(error.status_code, 404);
})
.always(function () {
start();
});
});
}(jIO, QUnit, Blob));
\ No newline at end of file
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
<script src="jio.storage/replicatestorage_fastrepair.tests.js"></script> <script src="jio.storage/replicatestorage_fastrepair.tests.js"></script>
<script src="jio.storage/replicatestorage_fastrepairattachment.tests.js"></script> <script src="jio.storage/replicatestorage_fastrepairattachment.tests.js"></script>
<script src="jio.storage/shastorage.tests.js"></script> <script src="jio.storage/shastorage.tests.js"></script>
<script src="jio.storage/parserstorage.tests.js"></script>
<!--script src="jio.storage/qiniustorage.tests.js"></script--> <!--script src="jio.storage/qiniustorage.tests.js"></script-->
<!--script src="jio.storage/indexstorage.tests.js"></script--> <!--script src="jio.storage/indexstorage.tests.js"></script-->
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment