Commit 42119f6a authored by Alexandra Rogova's avatar Alexandra Rogova

index received from db is bad

parent 3ff3b171
jsSHA @ 766f8ff7
Subproject commit 766f8ff7d926347b008a252a41b06565df747ac5
rusha @ 7dc20211
Subproject commit 7dc2021195219e54b2696af524141696f35694dd
......@@ -6,7 +6,8 @@
<script src="../../external/jio/dist/jio-latest.js"></script>
<script src="../../external/renderjs/dist/renderjs-latest.js"></script>
<script src="../../external/flexsearch/flexsearch.js"></script>
<script src="../../external/msgpack-lite/dist/msgpack.min.js"></script>
<script src="../../external/msgpack-lite/dist/msgpack.min.js"></script>
<script src="../../external/jsSHA/src/sha1.js"></script>
<script src="../js/gadget_model.js"></script>
</head>
<body>
......
......@@ -18,31 +18,30 @@
.setState({
to_load: [
"44_svt.xml", //135 urls
// "allemandfacile.xml", //650 urls
// "anglaisfacile.xml", //567 urls
// "bescherelle.xml", //60 urls
// "codeacademy.xml", //28 urls
// "francaisfacile.xml", //1119 urls
//"44_svt.xml", //135 urls
//"allemandfacile.xml", //650 urls
//"anglaisfacile.xml", //567 urls
"codeacademy.xml", //28 urls
//"francaisfacile.xml", //1119 urls
//"hgeo_college.xml", //227 urls
//"histoirencours.xml", //1415 urls
//"italienfacile.xml", //1477 urls
//"jerevise.xml", //918 urls
// "histoirencours.xml", //1415 urls
// "italienfacile.xml", //1478 urls
// "jerevise.xml", //919 urls
// "junior_science_et_vie.xml", //532 urls
// "kmusic.xml", //107 urls
//"kmusic.xml", //107 urls
// "larousse.xml", //4563 urls
// //"letudiant.xml", //41649 urls
// "lewebpedagogique.xml", //298 urls
// //"livrespourtous.xml", //12061 urls
// "mathovore.xml", //2221 urls
// "monanneeaucollege.xml", //121 urls
// "mathovore.xml", //2222 urls
//"monanneeaucollege.xml", //121 urls
// "nosdevoirs.xml", //462 urls
// "physagreg.xml", //150 urls
// "physique_chimie_college.xml", //282 urls
// "reviser_brevet.xml", //229 urls
//"physagreg.xml", //150 urls
//"physique_chimie_college.xml", //282 urls
//"reviser_brevet.xml", //229 urls
// "soutien67.xml", //1604 urls
// //"superprof.xml", //12296 urls
// "technologieaucollege27.xml", //129 urls
//"technologieaucollege27.xml", //129 urls
// "espagnolfacile.xml", //3352 urls
// "vivelessvt.xml", //1257 urls
// // TEST SITEMAPS TO FILL INDEX
......@@ -67,17 +66,17 @@
// "vox.xml", //1194 urls
// "cbsnews.xml", //1260 urls
// "mirror.xml", //3528 urls
// "abcnews.xml", //1077 urls
// "abcnews.xml", //971 urls
// "lequipe.xml", //3455 urls
// "rugbyrama.xml", //1817 urls
//"rugbyrama.xml", //1817 urls
// "elle.xml", //3532 urls
// "figaro.xml", //2965 urls
// "lepoint.xml", //3747 urls
// "telerama.xml", //2593 urls
// "liberation.xml", //819 urls
// "lemonde.xml", //3517 urls
// "leparisien.xml", //2189 urls
// "latribune.xml" //3190 urls
//"leparisien.xml", //2189 urls
//"latribune.xml" //3190 urls
]
})
......@@ -118,16 +117,6 @@
return RSVP.all(promise_list);
}
});
/* return gadget.is_db_empty()
.push(function(empty){
if (empty) {
for (var i=0; i<gadget.state.to_load.length; i+=1){
promise_list.push(gadget.load_file("../../../crawler_test/" + gadget.state.to_load[i]));
}
return RSVP.all(promise_list);
}
});*/
})
.declareMethod("load_file", function(file_path, file_name){ //OK
......
......@@ -2,23 +2,18 @@
/*global window, RSVP, rJS, jIO*/
(function (window, document, RSVP, rJS, jIO) {
"use strict";
rJS(window)
.ready(function(){
var index, db;
var index = FlexSearch.create("memory");
index = FlexSearch.create("memory");
db = jIO.createJIO(
{
type : "query",
sub_storage : {
type : "uuid",
sub_storage : {
type : "indexeddb",
database : "mynij"
}
}
type : "indexeddb",
database : "mynij"
}
);
......@@ -45,24 +40,24 @@
})
.declareMethod("add_page", function(page_info){ //page_info = {link, title, description, item}
console.log(this.state.index.info());
var gadget = this,
tmp;
tmp = page_info;
tmp.portal_type = "page";
return gadget.state.db.put(page_info.link, tmp)
.push(function(){
var defer = RSVP.defer();
/*var defer = RSVP.defer();
gadget.state.index.add(page_info.link, page_info.title + " " + page_info.item, defer.resolve.bind(defer));
return defer.promise;
})
.push(function(){
return gadget._save_index();
return defer.promise;*/
return gadget.state.index.add(page_info.link, page_info.title + " " + page_info.item);
});
})
.declareMethod("loaded", function(file_name){
var gadget = this,
var gadget = this,
id;
console.log(file_name + " done loading");
return gadget.state.db.get("loaded")
.push(function(result){
......@@ -71,29 +66,183 @@
return gadget.state.db.put("loaded", tmp);
})
.push(undefined, function (my_error) {
console.log(my_error);
var tmp = {};
tmp[file_name] = true;
return gadget.state.db.put("loaded", tmp);
})
.always(function(){
return gadget._save_index();
});
})
.declareMethod("_save_index", function(){ //OK
.declareMethod("_save_index", function(){
/* var gadget = this;
console.log("getting index");
var serialized = this.state.index.export_test();
return this.state.db.put("index", serialized);
console.log("Index received. Saving");
return this.state.db.put("index", this.state.index.info())
.push(function(){
console.log(1);
//return gadget.state.db.putAttachment("index", "ids", new Blob([serialized.ids], {type : "application/json"}));
return gadget.state.db.putAttachment("index", "ids", new Blob([serialized.ids], {type : "text/plain"}));
})
.push(function(){
console.log(2);
//return gadget.state.db.putAttachment("index", "map", new Blob([serialized.map], {type : "application/json"}));
var shaObj = new jsSHA("SHA-1", "TEXT");
shaObj.update(serialized.map);
console.log("map sha1 : " + shaObj.getHash("HEX"));
return gadget.state.db.putAttachment("index", "map", new Blob([serialized.map], {type : "text/plain"}));
})
.push(function(){
console.log(3);
//return gadget.state.db.putAttachment("index", "ctx", new Blob([serialized.ctx], {type : "application/json"}));
return gadget.state.db.putAttachment("index", "ctx", new Blob([serialized.ctx], {type : "text/plain"}));
})
.push(function(){
console.log("index saved");
})
.push(undefined, function (my_error) {
throw my_error;
//console.log(my_error);
});*/
var gadget = this,
serialized,
i,
j = 0,
promise_list = [];
console.log("getting index");
serialized = this.state.index.export_test();
console.log("Index received. Saving");
return this.state.db.put("index_map", {})
.push(function(){
console.log(serialized.map.slice(0, 1000));
for (i = 0; i < serialized.map.length; i += 10000){
promise_list.push(gadget.state.db.putAttachment("index_map", j+"", new Blob([serialized.map.slice(i, i+10000)], {type : "application/json"})));
j+=1;
}
j = 0;
return RSVP.all(promise_list);
})
.push(function(){
return gadget.state.db.put("index_ids", {});
})
.push(function(){
promise_list = [];
for (i = 0; i < serialized.map.length; i += 10000){
promise_list.push(gadget.state.db.putAttachment("index_ids", j+"", new Blob([serialized.ids.slice(i, i+10000)], {type : "application/json"})));
j+=1;
}
j = 0;
return RSVP.all(promise_list);
})
.push(function(){
return gadget.state.db.put("index_ctx", {});
})
.push(function(){
promise_list = [];
for (i = 0; i < serialized.ctx.length; i += 10000){
promise_list.push(gadget.state.db.putAttachment("index_ctx", j+"", new Blob([serialized.map.slice(i, i+10000)], {type : "application/json"})));
j+=1;
}
return RSVP.all(promise_list);
})
.push(function(){
console.log("index saved");
})
.push(undefined, function (my_error) {
console.log(my_error);
});
})
.declareMethod("search", function(query){
return this.state.index.search(query);
})
.declareMethod("_load_index", function(msgpack){ //OK
var gadget = this;
return gadget.state.db.get("index")
.push(function(index){
gadget.state.index.import_test(index.ids, index.map, index.ctx);
.declareMethod("_load_index", function(msgpack){
/* var gadget = this,
ids,
map;
return gadget.state.db.getAttachment("index", "ids", {"format": "text"})
.push(function(result){
ids = result;
return gadget.state.db.getAttachment("index", "map", {"format": "text"});
})
.push(function(result){
map = result;
var shaObj = new jsSHA("SHA-1", "TEXT");
shaObj.update(map);
console.log("map sha1 : " + shaObj.getHash("HEX"));
return gadget.state.db.getAttachment("index", "ctx", {"format": "text"});
})
.push(function(result){
gadget.state.index.import_test(ids, map, result);
console.log("index imported from memory");
})
.push(undefined, function (my_error) {console.log(my_error)});*/
var gadget = this,
ids,
map,
ctx,
promise_list = [],
i;
return gadget.state.db.allAttachments("index_ids")
.push(function(result){
if (Object.keys(result).length !== 0){
for (i = 0; i < Object.keys(result).length; i+=1){
promise_list.push(gadget.state.db.getAttachment("index_ids", i+"", {format : "text"}));
}
return RSVP.all(promise_list);
} else {
return null;
}
})
.push(function(result){
if (result === null) ids = "";
else ids = result.join("");
return gadget.state.db.allAttachments("index_map");
})
.push(function(result){
if (Object.keys(result).length !== 0){
for (i = 0; i < Object.keys(result).length; i+=1){
promise_list.push(gadget.state.db.getAttachment("index_map", i+"", {format : "text"}));
}
return RSVP.all(promise_list);
} else {
return null;
}
})
.push(function(result){
console.log(result[0]);
if (result === null) map = "";
else map = result.join("");
console.log(map.slice(0, 1000));
return gadget.state.db.allAttachments("index_ctx");
})
.push(function(result){
if (Object.keys(result).length !== 0){
for (i = 0; i < Object.keys(result).length; i+=1){
promise_list.push(gadget.state.db.getAttachment("index_ctx", i+"", {format : "text"}));
}
return RSVP.all(promise_list);
} else {
return null;
}
})
.push(function(result){
if (result === null) ctx = "";
else ctx = result.join("");
return gadget.add_index(ids, map, ctx);
})
.push(function(result){
console.log("index imported from memory");
})
.push(undefined, function (my_error) {});
.push(undefined, function (my_error) {
console.log(my_error);
});
})
.declareMethod("is_empty", function(){
......
......@@ -14,6 +14,7 @@
return;
}
if (argument_list.length > 0) {
console.log(argument_list.length);
function_used.apply(context, argument_list.shift())
.then(function(result) {
pushAndExecute(global_defer);
......@@ -47,11 +48,12 @@
links = new DOMParser().parseFromString(links_file, "text/xml").getElementsByTagName("url"),
links_modified = [],
i;
console.log(file_name + " : " + links.length);
for (i=0; i<links.length; i+=1){
links_modified[i] = [links[i].getElementsByTagName('loc')[0].textContent];
}
return new RSVP.Queue().push(function() {
return dispatchQueue(gadget, gadget._get, links_modified, 1);
return dispatchQueue(gadget, gadget._get, links_modified, 3);
})
.push(function(){
return gadget.add_file(file_name);
......@@ -62,7 +64,7 @@
var gadget = this;
return new RSVP.Queue()
.push(function(){
var rng = Math.floor(Math.random() * Math.floor(10));
var rng = Math.floor(Math.random() * Math.floor(10));
if (rng % 2 === 0 ) return jIO.util.ajax({url : "https://softinst116265.host.vifib.net/erp5/ERP5Site_getHTTPResource?url=" + link});
else return jIO.util.ajax({url : "https://softinst116446.host.vifib.net/erp5/ERP5Site_getHTTPResource?url=" + link});
})
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment