Commit 8d8318c4 authored by Alain Takoudjou's avatar Alain Takoudjou

Mynij Search: improve performance, build index now in backround, load body...

Mynij Search: improve performance, build index now in backround, load body text in result is defered

Improve general search performances
Build index will now run in background and popup when build is finished
when result loaded, fetch body text in another task to speedup.
parent a0672a72
......@@ -68,6 +68,10 @@
</head>
<body>
<div id = "gadget_parser" data-gadget-url="gadget_mynij_parser.html"
data-gadget-scope="parser" data-gadget-sandbox="public"></div>
<div id = "gadget_model" data-gadget-url="gadget_mynij_model.html"
data-gadget-scope="model" data-gadget-sandbox="public"></div>
<div class="jqm-navmenu-panel"></div>
</body>
</html>
\ No newline at end of file
......@@ -238,7 +238,7 @@
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>987.16076.64465.22528</string> </value>
<value> <string>987.16121.1326.57685</string> </value>
</item>
<item>
<key> <string>state</string> </key>
......@@ -256,7 +256,7 @@
</tuple>
<state>
<tuple>
<float>1602515832.08</float>
<float>1603297250.59</float>
<string>UTC</string>
</tuple>
</state>
......
......@@ -87,6 +87,28 @@
.declareAcquiredMethod("getUrlParameter", "getUrlParameter")
.declareAcquiredMethod("getUrlForList", "getUrlForList")
/////////////////////////////////////////////////////////////////
// ready methods (Mynij update)
/////////////////////////////////////////////////////////////////
.ready(function () {
var gadget = this;
gadget.props = {};
return gadget.getDeclaredGadget("parser")
.push(function (result) {
gadget.props.parser_gadget = result;
return gadget.getDeclaredGadget("model");
})
.push(function (result) {
gadget.props.model_gadget = result;
});
})
.allowPublicAcquisition("add_to_index", function (args) {
return this.props.model_gadget.add_page(args[0], args[1]);
})
.allowPublicAcquisition("save_index", function (args) {
return this.props.model_gadget.save_index(args[0]);
})
/////////////////////////////////////////////////////////////////
// declared methods
/////////////////////////////////////////////////////////////////
......
......@@ -234,7 +234,7 @@
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>987.16208.16000.56951</string> </value>
<value> <string>987.29116.51782.25139</string> </value>
</item>
<item>
<key> <string>state</string> </key>
......@@ -252,7 +252,7 @@
</tuple>
<state>
<tuple>
<float>1602523373.23</float>
<float>1603354044.58</float>
<string>UTC</string>
</tuple>
</state>
......
......@@ -109,7 +109,7 @@
return gadget.state.result_gadget.showResultBox();
})
.push(function () {
return gadget.search(search_text, limit)
return gadget.search(search_text, limit, index)
})
.push(function () {
return gadget.add_searx_results(search_text);
......@@ -138,8 +138,21 @@
return gadget.state.model_gadget.search(key, chosen_index, limit);
})
.push(function (result) {
var result_queue = new RSVP.Queue(),
i;
console.log("search done");
var i, promise_list = [];
function push_result(result) {
result_queue
.push(function () {
return gadget.state.model_gadget.get_page(result);
})
.push(function (page) {
return gadget.state.result_gadget.addItem(page, key);
})
.push(undefined, function (error) {
console.log("Error while getting result: " + error);
});
}
if (!result || result.length === 0) {
return gadget.state.result_gadget.addItem(
{
......@@ -149,20 +162,9 @@
"");
} else {
for (i = 0; i < result.length; i += 1) {
promise_list.push(gadget.state.model_gadget.get_page(result[i]));
push_result(result[i]);
}
return new RSVP.Queue()
.push(function () {
return RSVP.all(promise_list);
})
.push(function (result) {
var i, promise_list = [];
for (i = 0; i < result.length; i += 1) {
promise_list.push(gadget.state.result_gadget.addItem(
result[i],
key));
}
});
return result_queue;
}
});
})
......
......@@ -242,7 +242,7 @@
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>987.27166.43455.58931</string> </value>
<value> <string>987.30048.36447.4505</string> </value>
</item>
<item>
<key> <string>state</string> </key>
......@@ -260,7 +260,7 @@
</tuple>
<state>
<tuple>
<float>1603199326.69</float>
<float>1603353786.26</float>
<string>UTC</string>
</tuple>
</state>
......
......@@ -143,12 +143,13 @@
// });
// })
.declareMethod("search", function (search_key, index_name) {
.declareMethod("search", function (search_key, index_name, limit) {
var gadget = this,
results = [];
return gadget.get_index(index_name)
.push(function (index) {
var limit = 100;
if (!limit)
limit = 1000;
console.log(index.info());
return index.search(search_key, limit);
})
......
......@@ -242,7 +242,7 @@
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>987.21786.43715.37666</string> </value>
<value> <string>987.28828.3761.54801</string> </value>
</item>
<item>
<key> <string>state</string> </key>
......@@ -260,7 +260,7 @@
</tuple>
<state>
<tuple>
<float>1603108982.58</float>
<float>1603282834.35</float>
<string>UTC</string>
</tuple>
</state>
......
/*jslint nomen: true, indent: 2, maxerr: 3, maxlen: 80*/
/*global window, RSVP, rJS, jIO*/
(function(window, RSVP, rJS, jIO) {
(function (window, RSVP, rJS, jIO) {
"use strict";
var concurrent_requests = 3;
var proxies = [
var concurrent_requests = 3,
proxies = [
"https://softinst116265.host.vifib.net/erp5/ERP5Site_getHTTPResource",
"https://softinst116446.host.vifib.net/erp5/ERP5Site_getHTTPResource"
];
function get_random_id(){
return '_' + Math.random().toString(36).substr(2, 9);
];
function get_random_id() {
return '_' + Math.random().toString(36).substr(2, 9);
}
function showError(error_message){
console.log(error_message);
function showError(error_message) {
console.log(error_message);
}
rJS(window)
.declareAcquiredMethod("getSetting", "getSetting")
.declareAcquiredMethod("setSetting", "setSetting")
.declareAcquiredMethod("notifySubmitted", "notifySubmitted")
.declareMethod("preRenderDocument", function (parent_options) {
var gadget = this;
return gadget.jio_get(parent_options.jio_key)
.push(function (parent_document) {
return parent_document;
});
})
var gadget = this;
return gadget.jio_get(parent_options.jio_key)
.push(function (parent_document) {
return parent_document;
});
})
.declareAcquiredMethod("add_to_index", "add_to_index")
.declareAcquiredMethod("save_index", "save_index")
.declareMethod("parse_user_entry_list", function(list, index_name){
var gadget = this,
links = list.split("\n").filter(link => link.length > 0);
return gadget.build_index(links, index_name)
.push(function(result){
console.log("done building index");
return gadget.save_index(index_name);
})
.push(undefined, function (my_error) {showError(my_error)});
//declare service to build index in background
.declareService(function () {
var gadget = this;
function build() {
var entry;
return new RSVP.Queue()
.push(function () {
return gadget.getSetting('build_list');
})
.push(function (build_list) {
var link_list;
if (!build_list || !build_list.length)
// wait 5s and check again if there is something to build
return RSVP.delay(5000);
else {
entry = build_list[0];
link_list = entry.links.split("\n").filter(l => l.length > 0);
return gadget.build_index(link_list, entry.index_name)
.push(function (result) {
console.log("done building index");
return gadget.save_index(entry.index_name)
.push(function (result) {
return gadget.notifySubmitted({
message: "Index '" + entry.index_name + "' built : " +
result + " item(s) added.",
status: "success"
});
});
})
.push(undefined, function (my_error) {
showError(my_error);
return gadget.notifySubmitted({
message: "Index '" + entry.index_name + "' built failed!",
status: "error"
});
})
.push(function () {
return gadget.getSetting('build_list');
})
.push(function (current) {
if (current && current.length &&
current[0].id === entry.id) {
current.shift();
return gadget.setSetting('build_list', current);
}
});
}
})
.push(function () {
return build();
});
}
return build();
})
.declareMethod("build_index", function(links, index_name){
var gadget = this, promise_list = [], i;
for (i=0; i<links.length; i+=1){
promise_list.push(gadget.get_and_handle(links[i], index_name));
}
return RSVP.all(promise_list);
/*.declareMethod("parse_user_entry_list", function (list, index_name) {
var gadget = this,
links = list.split("\n").filter(link => link.length > 0);
return gadget.build_index(links, index_name)
.push(function (result) {
console.log("done building index");
return gadget.save_index(index_name);
})
.push(undefined, function (my_error) {
showError(my_error);
});
})*/
.declareMethod("build_index", function (links, index_name) {
var gadget = this,
promise_list = [],
i;
for (i = 0; i < links.length; i += 1) {
promise_list.push(gadget.get_and_handle(links[i], index_name));
}
return RSVP.all(promise_list);
})
.declareMethod("get_and_handle", function(link, index_name){
var gadget = this, rng, proxy;
return new RSVP.Queue()
.push(function(){
rng = Math.floor(Math.random() * Math.floor(proxies.length));
proxy = proxies[rng];
return jIO.util.ajax({url : proxy + "?url=" + link});
.declareMethod("get_and_handle", function (link, index_name) {
var gadget = this,
rng,
proxy;
return new RSVP.Queue()
.push(function () {
rng = Math.floor(Math.random() * Math.floor(proxies.length));
proxy = proxies[rng];
return jIO.util.ajax({url : proxy + "?url=" + link});
})
.push(function(page){
if (link.includes("rss") || link.includes("atom")) {
return gadget.parse_rss(page.currentTarget.response, link, index_name);
} else if (link.includes("xml")) {
return gadget.parse_sitemap(page.currentTarget.response, link, index_name);
}
.push(function (page) {
if (link.includes("rss") || link.includes("atom")) {
return gadget.parse_rss(
page.currentTarget.response,
link,
index_name);
} else if (link.includes("xml")) {
return gadget.parse_sitemap(
page.currentTarget.response,
link,
index_name);
}
});
})
.declareMethod("parse_rss", function(rss_file, file_name, index_name){
var gadget = this,
parser = new RSSParser({defaultRSS: 2.0});
return new RSVP.Queue()
.push(function(){
return parser.parseString(rss_file);
})
.push(function(result){
var promise_list = [];
result.items = result.items.filter(item => item.title); //removes items without title
result.items.forEach(function(item){
if (!(item.link) || item.link.includes("https://localhost:"))
item.link = "no-link";
promise_list.push(gadget.add_to_index({
id : get_random_id(),
link : item.link,
title : item.title,
content : item.content
}, index_name));
});
return RSVP.all(promise_list);
});
.declareMethod("parse_rss", function (rss_file, file_name, index_name) {
var gadget = this,
parser = new RSSParser({defaultRSS: 2.0});
return new RSVP.Queue()
.push(function () {
return parser.parseString(rss_file);
})
.push(function (result) {
var promise_list = [];
//removes items without title
result.items = result.items.filter(item => item.title);
result.items.forEach(function (item) {
if (!item.link || item.link.includes("https://localhost:"))
item.link = "no-link";
promise_list.push(gadget.add_to_index({
id : get_random_id(),
link : item.link,
title : item.title,
content : item.content
}, index_name));
});
return RSVP.all(promise_list);
});
})
.declareMethod("parse_sitemap", function(links_file, file_name, index_name){
var gadget = this,
links = new DOMParser().parseFromString(links_file, "text/xml").getElementsByTagName("url"),
links_modified = [],
promise_list = [],
i,j;
if (links.length === 0) {
var links_file_split = links_file.split("\n");
for (i = 0; i<links_file_split.length; i+=1){
if (links_file_split[i].includes("https://")
|| links_file_split[i].includes("http://")){
links_modified[i] = [links_file_split[i]];
}
}
if (links_modified.length === 0) {
showError("no links found in sitemap file : " + file_name);
return null;
}
} else {
for (i=0; i<links.length; i+=1){
links_modified[i] = [links[i].getElementsByTagName('loc')[0].textContent];
}
.declareMethod("parse_sitemap",
function (links_file, file_name, index_name) {
var gadget = this,
links = new DOMParser().parseFromString(links_file, "text/xml")
.getElementsByTagName("url"),
links_modified = [],
promise_list = [],
links_file_split,
links_subset,
i,
j;
if (links.length === 0) {
links_file_split = links_file.split("\n");
for (i = 0; i < links_file_split.length; i += 1) {
if (links_file_split[i].includes("https://") ||
links_file_split[i].includes("http://")) {
links_modified[i] = [links_file_split[i]];
}
}
for (i=0; i<links_modified.length; i+=concurrent_requests){
var links_subset = [];
if (i+concurrent_requests > links_modified.length) {
links_subset = links_modified.slice(i);
} else {
links_subset = links_modified.slice(i, i+concurrent_requests);
}
promise_list.push(gadget.parse_subset(links_subset, index_name));
if (links_modified.length === 0) {
showError("no links found in sitemap file : " + file_name);
return null;
}
return new RSVP.Queue()
.push(function(){
return RSVP.all(promise_list);
});
})
.declareMethod("parse_subset", function(links, index_name){
var gadget = this, promise_list = [], i;
for (i = 0; i < links.length; i+=1){
promise_list.push(gadget.get_sitemap_item(links[i], index_name, 1));
} else {
for (i = 0; i < links.length; i += 1) {
links_modified[i] = [links[i]
.getElementsByTagName('loc')[0].textContent];
}
return RSVP.all(promise_list);
}
for (i = 0; i < links_modified.length; i += concurrent_requests) {
links_subset = [];
if ((i + concurrent_requests) > links_modified.length) {
links_subset = links_modified.slice(i);
} else {
links_subset = links_modified.slice(i, (i + concurrent_requests));
}
promise_list.push(gadget.parse_subset(links_subset, index_name));
}
return new RSVP.Queue()
.push(function () {
return RSVP.all(promise_list);
});
})
.declareMethod("get_sitemap_item", function(link, index_name, attempt){
var gadget = this;
return new RSVP.Queue()
.push(function(){
var rng = Math.floor(Math.random() * Math.floor(proxies.length)),
proxy = proxies[rng];
return jIO.util.ajax({url : proxy + "?url=" + link});
.declareMethod("parse_subset", function (links, index_name) {
var gadget = this,
promise_list = [],
i;
for (i = 0; i < links.length; i += 1) {
promise_list.push(gadget.get_sitemap_item(links[i], index_name, 1));
}
return RSVP.all(promise_list);
})
.declareMethod("get_sitemap_item", function (link, index_name, attempt) {
var gadget = this;
return new RSVP.Queue()
.push(function () {
var rng = Math.floor(Math.random() * Math.floor(proxies.length)),
proxy = proxies[rng];
return jIO.util.ajax({url : proxy + "?url=" + link});
})
.push(function(page){
if (page !== undefined) {
return gadget.parse_sitemap_item(page, index_name);
}
else return null;
.push(function (page) {
if (page !== undefined) {
return gadget.parse_sitemap_item(page, index_name);
}
else
return null;
})
.push(undefined, function (my_error) {
if (my_error.target.status === 500 && attempt <= 10) {
return gadget.get_sitemap_item(link, index_name, attempt+1);
}
else showError(my_error);
if (my_error.target.status === 500 && attempt <= 10) {
return gadget.get_sitemap_item(link, index_name, attempt + 1);
}
else
showError(my_error);
});
})
.declareMethod("parse_sitemap_item", function(page, index_name){
var gadget = this,
item,
result;
item = new DOMParser().parseFromString(page.currentTarget.response, "text/html");
result = {
id : get_random_id(),
link : page.currentTarget.responseURL.slice(proxies[0].length),
title : item.title,
content : item.getElementsByTagName("body")[0].innerText
};
return gadget.add_to_index(result, index_name);
.declareMethod("parse_sitemap_item", function (page, index_name) {
var gadget = this,
item,
result;
item = new DOMParser()
.parseFromString(page.currentTarget.response, "text/html");
result = {
id : get_random_id(),
link : page.currentTarget.responseURL.slice(proxies[0].length),
title : item.title,
content : item.getElementsByTagName("body")[0].innerText
};
return gadget.add_to_index(result, index_name);
});
}(window, RSVP, rJS, jIO));
}(window, RSVP, rJS, jIO));
......@@ -242,7 +242,7 @@
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>985.52218.37740.41728</string> </value>
<value> <string>987.29976.13991.60689</string> </value>
</item>
<item>
<key> <string>state</string> </key>
......@@ -260,7 +260,7 @@
</tuple>
<state>
<tuple>
<float>1597156560.08</float>
<float>1603350351.31</float>
<string>UTC</string>
</tuple>
</state>
......
......@@ -2,8 +2,13 @@
/*global window, RSVP, rJS, document, DOMParser*/
(function (window, RSVP, rJS, document, DOMParser) {
"use strict";
rJS(window)
var gadget_klass = rJS(window);
gadget_klass
.setState({
body_list: []
})
.declareAcquiredMethod("getUrlFor", "getUrlFor")
.declareMethod("showResultBox", function () {
......@@ -28,6 +33,7 @@
loader.style.display = "block";
loader = document.getElementById("searx-loading");
loader.style.display = "block";
return this.changeState({body_list: []});
})
.declareMethod("addItem", function (item, key) {
......@@ -36,11 +42,12 @@
title,
link,
link_par,
body,
body_list = [],
gadget = this;
list = document.getElementById("mynij-results");
list_item = document.createElement("LI");
list_item = document.createElement("li");
list_item.id = Math.random().toString(36) + '-' + Date.now();
if (item.link.startsWith("?url=")) {
item.link = item.link.substring(5);
}
......@@ -64,37 +71,67 @@
var loader = document.getElementById("mynij-loading");
loader.style.display = "none";
body = document.createElement('p');
body.className = "body";
//item.body = new DOMParser().parseFromString(
// item.body,
// "text/html"
// ).body.textContent || "";
if (key === "" || item.content === "") {
body.innerHTML = "";
list.appendChild(list_item);
} else {
//return gadget.cut_description(item.body, item.description, key)
return gadget.cut_description(item.content, key)
.push(function (result) {
var array = [...result.matchAll(key)],
i;
for (i = 0; i < array.length; i += 1) {
result = result.slice(0, array[i].index) +
"<b>" + result.slice(array[i].index,
array[i].index + array[i][0].length + 1
) +
"</b>" + result.slice(array[i].index +
array[i][0].length + 1);
}
body.innerHTML = result;
list_item.appendChild(body);
list.appendChild(list_item);
});
list.appendChild(list_item);
gadget.state.body_list.push({
element: list_item,
content: item.content,
key: key
});
}
});
})
.declareService(function () {
var gadget = this,
delay = 10,
item;
function add_description() {
if (!gadget.state.body_list.length) {
return new RSVP.Queue()
.push(function () {
return RSVP.delay(1000);
})
.push(function () {
return add_description();
});
}
item = gadget.state.body_list.shift();
return gadget.cut_description(item.content, item.key)
.push(function (description) {
var body,
regex = new RegExp(item.key, "ig"),
match_list,
key_map = {};
match_list = description.match(regex);
if (match_list) {
match_list.forEach(function (value) {
key_map[value] = '<b>' + value + '</b>';
});
regex = new RegExp(Object.keys(key_map).join("|"), "gi");
description = description.replace(regex, function (matched) {
return key_map[matched];
});
}
body = document.createElement('p');
body.className = "body";
body.innerHTML = description;
item.element.appendChild(body);
})
.push(function () {
return RSVP.delay(delay);
})
.push(function () {
return add_description();
});
}
return add_description();
})
.declareMethod("add_searx_no_result", function (msg) {
var error_html,
list_item = document.createElement("li"),
......
......@@ -242,7 +242,7 @@
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>987.26221.22837.50619</string> </value>
<value> <string>987.30114.65034.20957</string> </value>
</item>
<item>
<key> <string>state</string> </key>
......@@ -260,7 +260,7 @@
</tuple>
<state>
<tuple>
<float>1603124223.76</float>
<float>1603357692.32</float>
<string>UTC</string>
</tuple>
</state>
......
......@@ -10,13 +10,5 @@
<script src="action_mynij_search_build_index.js" type="text/javascript"></script>
</head>
<body>
<div id = "gadget_parser"
data-gadget-url="gadget_mynij_parser.html"
data-gadget-scope="parser"
data-gadget-sandbox="public">
<div id = "gadget_model"
data-gadget-url="gadget_mynij_model.html"
data-gadget-scope="model"
data-gadget-sandbox="public">
</body>
</html>
\ No newline at end of file
(function (window, rJS, RSVP) {
"use strict";
rJS(window)
.declareAcquiredMethod("notifySubmitting", "notifySubmitting")
.declareAcquiredMethod("notifySubmitted", "notifySubmitted")
.declareAcquiredMethod("setSetting", "setSetting")
.declareAcquiredMethod("jio_get", "jio_get")
.declareAcquiredMethod("jio_post", "jio_post")
.allowPublicAcquisition("add_to_index", function(args){
return this.state.model_gadget.add_page(args[0], args[1]);
})
.allowPublicAcquisition("save_index", function(args){
return this.state.model_gadget.save_index(args[0]);
})
.ready(function(){
var gadget = this,
parser_gadget;
return gadget.getDeclaredGadget("parser")
.push(function(result){
parser_gadget = result;
return gadget.getDeclaredGadget("model");
})
.push(function(result){
gadget.changeState({
parser_gadget : parser_gadget,
model_gadget : result
});
});
})
.declareMethod("preRenderDocument", function (parent_options) {
var gadget = this;
return gadget.jio_get(parent_options.jio_key)
.push(function (parent_document) {
return parent_document;
});
.push(function (parent_document) {
return parent_document;
});
})
.declareMethod("handleSubmit", function (content_dict, parent_options) {
var return_submit_dict,
var return_submit_dict,
gadget = this,
list = parent_options.doc.links,
index_name = parent_options.doc.title,
property;
return new RSVP.Queue()
.push(function(){
return gadget.state.parser_gadget.parse_user_entry_list(list, index_name);
//return gadget.buildIndex(list, index_name);
// we build only one index per time for now
return gadget.setSetting("build_list", [{
links: list,
index_name: index_name,
id: Date.now()
}]);
})
.push(function(result){
.push(function(){
return_submit_dict = {
notify: {
message: "Index built : " + result + " item(s) added.",
status: "Status"
message: "Index " + index_name + " will start building...",
status: "success"
},
redirect: {
command: "raw",
options: {"url" : "#/" + parent_options.action_options.jio_key}
command: "display",
options: {page : "ojs_local_controller"}
}
}
};
return return_submit_dict;
})
});
});
}(window, rJS, RSVP));
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment