Commit 2c6df048 authored by Alain Takoudjou's avatar Alain Takoudjou

erp5_web_mynij_search: fix memory leak on sitemap building

Build some sitemap can take 3G of RAM or more and freeze the browser or tab. Reduce amount of running ajax queries per time and make sure objects will be removed in memory after usage.
parent c22bb2a4
...@@ -98,7 +98,7 @@ ...@@ -98,7 +98,7 @@
index_id = "index-" + index_name, index_id = "index-" + index_name,
new_index; new_index;
new_index = FlexSearch.create("memory", {stemmer : "en", filter : "en", encode: "icase"}); new_index = FlexSearch.create("memory", {stemmer : "en", filter : "en"});
return gadget.state.db.getAttachment(index_id, "ids", {"format": "text"}) return gadget.state.db.getAttachment(index_id, "ids", {"format": "text"})
.push(function (result) { .push(function (result) {
ids = result; ids = result;
......
...@@ -242,7 +242,7 @@ ...@@ -242,7 +242,7 @@
</item> </item>
<item> <item>
<key> <string>serial</string> </key> <key> <string>serial</string> </key>
<value> <string>987.28828.3761.54801</string> </value> <value> <string>987.34699.59880.60962</string> </value>
</item> </item>
<item> <item>
<key> <string>state</string> </key> <key> <string>state</string> </key>
...@@ -260,7 +260,7 @@ ...@@ -260,7 +260,7 @@
</tuple> </tuple>
<state> <state>
<tuple> <tuple>
<float>1603282834.35</float> <float>1603632793.79</float>
<string>UTC</string> <string>UTC</string>
</tuple> </tuple>
</state> </state>
......
...@@ -64,10 +64,10 @@ ...@@ -64,10 +64,10 @@
tmp_element.className = 'building'; tmp_element.className = 'building';
tmp_element.innerHTML = panel_template_progress({ tmp_element.innerHTML = panel_template_progress({
title: entry.index_name title: entry.index_name
}) });
parent_content.appendChild(tmp_element); parent_content.appendChild(tmp_element);
return gadget.build_index(link_list, entry.index_name) return gadget.build_index(link_list, entry.index_name)
.push(function (result) { .push(function () {
console.log("done building index"); console.log("done building index");
return gadget.save_index(entry.index_name) return gadget.save_index(entry.index_name)
.push(function (result) { .push(function (result) {
...@@ -109,7 +109,7 @@ ...@@ -109,7 +109,7 @@
.push(function (element) { .push(function (element) {
parent_content = element; parent_content = element;
return build(); return build();
}) });
}) })
/*.declareMethod("parse_user_entry_list", function (list, index_name) { /*.declareMethod("parse_user_entry_list", function (list, index_name) {
...@@ -147,6 +147,8 @@ ...@@ -147,6 +147,8 @@
return jIO.util.ajax({url : proxy + "?url=" + link}); return jIO.util.ajax({url : proxy + "?url=" + link});
}) })
.push(function (page) { .push(function (page) {
if (!page)
return;
if (link.includes("rss") || link.includes("atom")) { if (link.includes("rss") || link.includes("atom")) {
return gadget.parse_rss( return gadget.parse_rss(
page.currentTarget.response, page.currentTarget.response,
...@@ -158,12 +160,17 @@ ...@@ -158,12 +160,17 @@
link, link,
index_name); index_name);
} }
})
.push(undefined, function (error) {
console.log(error);
}); });
}) })
.declareMethod("parse_rss", function (rss_file, file_name, index_name) { .declareMethod("parse_rss", function (rss_file, file_name, index_name) {
var gadget = this, var gadget = this,
promise_list = [],
parser = new RSSParser({defaultRSS: 2.0}); parser = new RSSParser({defaultRSS: 2.0});
return new RSVP.Queue() return new RSVP.Queue()
.push(function () { .push(function () {
return parser.parseString(rss_file); return parser.parseString(rss_file);
...@@ -192,12 +199,19 @@ ...@@ -192,12 +199,19 @@
links = new DOMParser().parseFromString(links_file, "text/xml") links = new DOMParser().parseFromString(links_file, "text/xml")
.getElementsByTagName("url"), .getElementsByTagName("url"),
links_modified = [], links_modified = [],
promise_list = [],
links_file_split, links_file_split,
links_subset, links_subset,
i, i,
j; j,
queue = new RSVP.Queue(),
parse_subset;
parse_subset = function (subset) {
queue
.push(function () {
return gadget.parse_subset(subset, index_name);
});
};
if (links.length === 0) { if (links.length === 0) {
links_file_split = links_file.split("\n"); links_file_split = links_file.split("\n");
for (i = 0; i < links_file_split.length; i += 1) { for (i = 0; i < links_file_split.length; i += 1) {
...@@ -224,13 +238,11 @@ ...@@ -224,13 +238,11 @@
} else { } else {
links_subset = links_modified.slice(i, (i + concurrent_requests)); links_subset = links_modified.slice(i, (i + concurrent_requests));
} }
promise_list.push(gadget.parse_subset(links_subset, index_name)); parse_subset(links_subset);
} }
return new RSVP.Queue() links = null;
.push(function () { return queue;
return RSVP.all(promise_list);
});
}) })
.declareMethod("parse_subset", function (links, index_name) { .declareMethod("parse_subset", function (links, index_name) {
...@@ -260,7 +272,9 @@ ...@@ -260,7 +272,9 @@
return null; return null;
}) })
.push(undefined, function (my_error) { .push(undefined, function (my_error) {
if (my_error.target.status === 500 && attempt <= 10) { if (!my_error.target) {
showError(my_error);
} else if (my_error.target.status === 500 && attempt <= 3) {
return gadget.get_sitemap_item(link, index_name, attempt + 1); return gadget.get_sitemap_item(link, index_name, attempt + 1);
} }
else else
...@@ -270,17 +284,36 @@ ...@@ -270,17 +284,36 @@
.declareMethod("parse_sitemap_item", function (page, index_name) { .declareMethod("parse_sitemap_item", function (page, index_name) {
var gadget = this, var gadget = this,
item, regex,
title = "",
tmp_div,
content,
result; result;
item = new DOMParser() regex = page.currentTarget.response.match(/<title[^>]*>([^<]+)<\/title>/);
.parseFromString(page.currentTarget.response, "text/html"); if (regex)
title = regex[1];
regex = page.currentTarget.response
.match(/<body[^>]*>((.|[\n\r])*)<\/body>/gim);
if (regex) {
tmp_div = document.createElement("div");
tmp_div.innerHTML = regex[0];
}
if (!tmp_div)
return;
result = { result = {
id : get_random_id(), id : get_random_id(),
link : page.currentTarget.responseURL.slice(proxies[0].length), link : page.currentTarget.responseURL.slice(proxies[0].length),
title : item.title, title : title,
content : item.getElementsByTagName("body")[0].innerText content : tmp_div.textContent || tmp_div.innerText || ""
}; };
return gadget.add_to_index(result, index_name); return gadget.add_to_index(result, index_name)
.push(function () {
while (tmp_div.firstChild) {
tmp_div.removeChild(tmp_div.lastChild);
}
tmp_div = null;
result = null;
});
}); });
}(window, RSVP, rJS, jIO, Handlebars)); }(window, RSVP, rJS, jIO, Handlebars));
...@@ -242,7 +242,7 @@ ...@@ -242,7 +242,7 @@
</item> </item>
<item> <item>
<key> <string>serial</string> </key> <key> <string>serial</string> </key>
<value> <string>987.31551.51611.38178</string> </value> <value> <string>987.37780.16896.38656</string> </value>
</item> </item>
<item> <item>
<key> <string>state</string> </key> <key> <string>state</string> </key>
...@@ -260,7 +260,7 @@ ...@@ -260,7 +260,7 @@
</tuple> </tuple>
<state> <state>
<tuple> <tuple>
<float>1603444202.17</float> <float>1603817569.05</float>
<string>UTC</string> <string>UTC</string>
</tuple> </tuple>
</state> </state>
......
...@@ -198,12 +198,12 @@ ...@@ -198,12 +198,12 @@
.declareMethod("cut_description", function (body, key) { .declareMethod("cut_description", function (body, key) {
//function(body, description, key) //function(body, description, key)
var result, var result,
max_length = 250,
key_list = key.split(' '), key_list = key.split(' '),
search_key = key; search_key = key;
function extract_description(description) { function extract_description(description) {
var regex, var regex,
max_length = 250,
start, start,
end, end,
count = 0, count = 0,
...@@ -241,11 +241,7 @@ ...@@ -241,11 +241,7 @@
if (result === null) { if (result === null) {
//if (description !== "") return description; else.... //if (description !== "") return description; else....
result = new RegExp('[^.?!]*[.?!]').exec(body); return body.slice(0, max_length) + "...";
if (result === null)
return "";
else
return result[0];
} }
else else
return extract_description(result[0]); return extract_description(result[0]);
......
...@@ -242,7 +242,7 @@ ...@@ -242,7 +242,7 @@
</item> </item>
<item> <item>
<key> <string>serial</string> </key> <key> <string>serial</string> </key>
<value> <string>987.30114.65034.20957</string> </value> <value> <string>987.31604.11529.7168</string> </value>
</item> </item>
<item> <item>
<key> <string>state</string> </key> <key> <string>state</string> </key>
...@@ -260,7 +260,7 @@ ...@@ -260,7 +260,7 @@
</tuple> </tuple>
<state> <state>
<tuple> <tuple>
<float>1603357692.32</float> <float>1603447348.31</float>
<string>UTC</string> <string>UTC</string>
</tuple> </tuple>
</state> </state>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment