new comands for staging and reset

f7de6621 · roqueporchetto@gmail.com · 963e3e0d · f7de6621 · f7de6621 · f7de6621
Commit f7de6621 authored Oct 03, 2018 by roqueporchetto@gmail.com
8 changed files
--- a/ebulk
+++ b/ebulk
@@ -22,6 +22,9 @@ GREEN='\033[0;32m'
 ORANGE='\033[0;33m'
 NC='\033[0m'
 DEFAULT_CHUNK_SIZE="50"
+STAGE_ADD="add"
+STAGE_REMOVE="remove"
+STAGE_RESET="reset"
 function helpReadme {
    echo -e "[INFO] For help, please run '${GREEN}ebulk --help${NC}'"
@@ -42,9 +45,9 @@ function checkParameters {
 	helpReadme >&2; return 1
    fi
    if [ "$STORAGE" = "" ] ; then
-      if [ ! -d $DATASET_DIR ]; then
+      if [ ! -d "$DATASET_DIR" ]; then
          echo
-          mkdir $DATASET_DIR 2>/dev/null
+          mkdir "$DATASET_DIR" 2>/dev/null
 	  if [ ! $? -eq 0 ]; then
              echo
              echo -e "${ORANGE}[ERROR] Dataset path not found."
@@ -55,14 +58,19 @@ function checkParameters {
      fi
      EBULK_DATASET_FILE="$DATASET_DIR/.ebulk_dataset"
      if [[ $DATASET_DIR != $REFERENCE ]]; then
+	  if [ "$REFERENCE" = "." ] ; then
+		REFERENCE=$(basename "$DATASET_DIR")
+	  fi
 	  DATA_SET=$REFERENCE
-	  echo $REFERENCE > $EBULK_DATASET_FILE 2>/dev/null
+	  echo $REFERENCE > "$EBULK_DATASET_FILE" 2>/dev/null
      else
-	  if [ -f $EBULK_DATASET_FILE ]; then
+	  if [ -f "$EBULK_DATASET_FILE" ]; then
-	      DATA_SET=$(cat "$DATASET_DIR/.ebulk_dataset")
+	      DATA_SET=$(cat "$DATASET_DIR/.ebulk_dataset" 2>/dev/null)
 	  else
 	      DATA_SET=$(basename "$DATASET_DIR")
-	      echo $DATA_SET > $EBULK_DATASET_FILE 2>/dev/null
+	      if [ "$DATA_SET" != "." ] ; then
+		SAVE_DATASET_NAME="TRUE"
+	      fi
 	  fi
      fi
    else
@@ -70,14 +78,19 @@ function checkParameters {
    fi
    re='^[A-Za-z][_A-Za-z.0-9-]*$'
    if ! [[ $DATA_SET =~ $re ]] ; then
+	if [ "$DATA_SET" = "." ] && [[ -z "$STORAGE" ]] ; then
+		echo
+		echo -e "${ORANGE}[ERROR] You are not in a dataset directory ${GREEN}'$DATA_SET'${ORANGE}.${NC}"
+		echo
+	else
 		echo
 		echo -e "${ORANGE}[ERROR] Error in argument: invalid dataset name ${GREEN}'$DATA_SET'${ORANGE}.${NC}"
 		echo -e "${ORANGE}[ERROR] Dataset name must start with a letter, and only alphanumerics, dots ( . ), underscores ( _ ) and hyphens ( - ) are allowed.${NC}"
 		echo
-	if [ -f $EBULK_DATASET_FILE ]; then
-		rm -f ${EBULK_DATASET_FILE}
 	fi
 	helpReadme >&2; return 1
+    elif [ ! -z "$SAVE_DATASET_NAME" ]; then
+	echo $DATA_SET > "$EBULK_DATASET_FILE" 2>/dev/null
    fi
    if [ ! -z "$CHUNK" ]; then
        re='^[0-9]+$'
@@ -143,6 +156,7 @@ function updateConfigFile {
    DOWN_URL=\"$DOWN_URL\"
    ING_URL=\"$ING_URL\"
    STORAGE=\"$STORAGE\"
+    STATUS=\"$STATUS\"
    S3_BUCKET=\"$S3_BUCKET\"
    S3_PREFIX=\"$S3_PREFIX\"
@@ -174,6 +188,7 @@ function runProcess {
        return 1
    fi
    echo -e "[INFO] Dataset: ${GREEN}$DATA_SET${NC}"
+    if [ -z "$STATUS" ]; then
    	if [ ! -z "$CHUNK" ]; then
 	    if [ "$CHUNK" -eq "0" ]; then
 	    	echo "[INFO] Default chunk size: $DEFAULT_CHUNK_SIZE Mb."
@@ -181,17 +196,20 @@ function runProcess {
 	    	echo "[INFO] Chunk size set in $CHUNK Mb."
 	    fi
    	fi
+    fi
+    if [ -z "$STATUS" ]; then
    	if ! askCredentials; then
            return 1
    	fi
+    fi
    echo
-    echo "[INFO] Supplier: $USER"
    updateConfigFile
    echo "[INFO] Starting operation..."
    if [ ! -d $LOG_DIR ]; then
        mkdir $LOG_DIR 2>/dev/null
    fi
    $embulk run -L $TOOL_PATH/embulk-wendelin-dataset-tool $FILE $DIFF 2> "$LOG_DIR/error.log" || {
+	if [ -z "$STATUS" ]; then
 	      echo
 	      echo -e "${ORANGE}[ERROR] Embulk tool stopped its execution.${NC}"
 	      if [ "$STORAGE" != \"\" ] ; then
@@ -200,6 +218,7 @@ function runProcess {
 	      fi
 	      echo "[INFO] Please check the logs in '$LOG_DIR' directory for more details."
 	      echo
+	fi
    }
 }
@@ -383,17 +402,37 @@ function askS3parameters {
    fi
 }
-# WELCOME
+function stage {
-echo
+	EBULK_DATASET_FILE="./.ebulk_dataset"
-echo "   #########################################################################"
+	if [ ! -f "$EBULK_DATASET_FILE" ]; then
-echo "   ############## WELCOME TO EBULK INGESTION-DOWNLOAD TOOL #################"
+		echo
-echo "   ########### This tool relies on Embulk software and Java 8 ##############"
+		echo -e "${ORANGE}[ERROR] You are not in a dataset directory."
-echo "   ######## Do not forget to check the README before use this tool #########"
+		echo -e "[INFO] $OP operation can only be run within a root dataset directory.${NC}"
-echo "   ############## In case of any problem, please contact us  ###############"
+		echo
-echo "   ####################### roqueporchetto@gmail.com ########################"
+		helpReadme >&2; exit
-echo "   ###################### Happy ingestion-download ! #######################"
+	fi
-echo "   #########################################################################"
+	if [[ $PATH_TO_ELEMENT = "" ]]; then
-echo
+		echo
+		echo -e "${ORANGE}[ERROR] Nothing specified, nothing to $OP."
+		echo -e "[INFO] Please specify a valid path.${NC}"
+		echo
+		helpReadme >&2; exit
+	fi
+	STAGE_FILE="./.staged"
+}
+function welcome {
+	echo
+	echo "   #########################################################################"
+	echo "   ############## WELCOME TO EBULK INGESTION-DOWNLOAD TOOL #################"
+	echo "   ########### This tool relies on Embulk software and Java 8 ##############"
+	echo "   ######## Do not forget to check the README before use this tool #########"
+	echo "   ############## In case of any problem, please contact us  ###############"
+	echo "   ####################### roqueporchetto@gmail.com ########################"
+	echo "   ###################### Happy ingestion-download ! #######################"
+	echo "   #########################################################################"
+	echo
+}
 if [ ! -d $EBULK_DATA_PATH ]; then
 	mkdir $EBULK_DATA_PATH 2>/dev/null
@@ -428,14 +467,21 @@ while [ "$1" != "" ]; do
 	-h | --help )           	cat $TOOL_PATH/help.md
 					exit
 					;;
+	-e | --examples )           	cat $TOOL_PATH/example.md
+					exit
+					;;
 	-r | --readme )              	less $TOOL_PATH/README.md
 					exit
 					;;
-        pull )				OPERATION=$1
+	status | push | pull )		OPERATION=$1
 					;;
-        push )				OPERATION=$1
+	add | remove | reset )		OPERATION=$1
+					shift
+					PATH_TO_ELEMENT=$1
+					REFERENCE="."
 					;;
 	*)				if [[ $REFERENCE != $1 ]]; then
+						echo
 						echo -e "${ORANGE}[ERROR] Invalid parameter '$1'.${NC}"
 						echo
 						helpReadme >&2; exit
@@ -444,14 +490,15 @@ while [ "$1" != "" ]; do
    shift
 done
+for ELEMENT in '' '-d' '--directory' '-s' '--storage' '-cs' '--custom-storage' '-a' '--advanced' '-c' '--chunk'; do
+  if [ "$ELEMENT" = "$REFERENCE" ]; then
+	REFERENCE="."
+  fi
+done
 if [[ $OPERATION = "" ]]; then
-	echo -e "${ORANGE}[ERROR] Please specify a valid operation.${NC}"
 	echo
-	helpReadme >&2; exit
+	echo -e "${ORANGE}[ERROR] Please specify a valid operation.${NC}"
-fi
-if [[ $REFERENCE = "" ]]; then
-	echo -e "${ORANGE}[ERROR] Dataset not specified."
-	echo -e "[INFO] Please specify a valid dataset.${NC}"
 	echo
 	helpReadme >&2; exit
 fi
@@ -463,7 +510,41 @@ if [[ $CHUNK = "" ]]; then
 fi
 case $OPERATION in
+    add)
+	OP=$STAGE_ADD
+	stage
+	ELEMENT="./$PATH_TO_ELEMENT"
+	if [ -d "$ELEMENT" ] || [ -f "$ELEMENT" ]; then
+		echo "$OP;$PATH_TO_ELEMENT" >> $STAGE_FILE
+	else
+		echo
+		echo -e "${ORANGE}[ERROR] '$PATH_TO_ELEMENT' did not match any files or directories."
+		echo -e "[INFO] Please specify a valid path.${NC}"
+		echo
+		helpReadme >&2; exit
+	fi
+	;;
+    remove)
+	OP=$STAGE_REMOVE
+	stage
+	echo "$OP;$PATH_TO_ELEMENT" >> $STAGE_FILE
+	;;
+    reset)
+	OP=$STAGE_RESET
+	stage
+	echo "$OP;$PATH_TO_ELEMENT" >> $STAGE_FILE
+	;;
+    status)
+	welcome
+	STATUS=$OPERATION
+	FILE=$ING_FILE
+	TEMPLATE_FILE=$ING_TEMPLATE_FILE
+	echo "### DATASET STATUS ###"
+	echo
+	runProcess
+	;;
    pull)
+	welcome
 	FILE=$DOWN_FILE
 	TEMPLATE_FILE=$DOWN_TEMPLATE_FILE
 	if [ "$STORAGE" != "" ] ; then
@@ -480,6 +561,7 @@ case $OPERATION in
 	runProcess
 	;;
    push)
+	welcome
 	MESSAGE="storage: $STORAGE"
 	if [ "$CUSTOM" = true ] ; then
 		FILE=$CUSTOM_ING_FILE

--- a/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/dataset_utils.rb
+++ b/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/dataset_utils.rb
--- a/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/input/fif.rb
+++ b/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/input/fif.rb
--- a/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/input/wendelin.rb
+++ b/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/input/wendelin.rb
@@ -7,9 +7,6 @@ module Embulk
    class Wendelininput < InputPlugin
-      CHUNK_SIZE = 50000000 #50mb
-      MEGA = 1000000
      UPDATE = "U"
      RESUME = "R"
      DOWNLOAD = "D"
@@ -17,34 +14,17 @@ module Embulk
      Plugin.register_input("wendelin", self)
-      def self.warnConflicts(remote_streams, data_set, action)
+      def self.warnConflicts(remote_streams, data_set)
 	if not remote_streams.empty?
-	  paths = [@data_set_directory.end_with?("/") ? @data_set_directory : @data_set_directory + "/"]
+	  conflicts = @dataset_utils.getLocalConflicts(remote_streams, data_set)
-	  local_files = paths.map {|path|
-				      next [] unless Dir.exist?(path)
-				      Dir[(path + '/**/*').gsub! '//', '/']
-				    }.flatten.select{ |file| File.file?(file) }
-	  local_changes, a, b, c = @dataset_utils.getLocalChanges(local_files, data_set)
-	  data_set = @data_set.end_with?("/") ? @data_set : @data_set + "/"
-	  remote_changes = remote_streams.map { |remote|
-		remote = @data_set_directory + remote["reference"].reverse.sub("/".reverse, ".".reverse).reverse.sub(data_set, "")
-		remote.end_with?(".none") ? remote[0...-5] : remote
-	  }
-	  conflicts = local_changes.select{ |conflict| remote_changes.include? conflict["path"] }.map{ |conflict| conflict["path"] }
-	  # check scenario where the last version file exists but not in report
-	  # (due download interrumped right after save the file but before add it to report)
-	  if action == RESUME and conflicts.length == 1 and File.exist?(conflicts[0])
-	    @logger.warn("The file #{conflicts[0]} was detected as false positive conflict and it was not informed to user.")
-	    conflicts = []
-	  end
 	  if not conflicts.empty?
 	    @logger.warn("CONFLICT: there are conflicts with some of your local changes.", print=TRUE)
 	    puts "** press key **"
 	    option = gets
-	    @logger.warn("Conflicted files:", print=TRUE)
+	    @logger.warn("Conflicted changes:", print=TRUE)
 	    @logger.warn(conflicts, print=TRUE)
 	    puts
-	    @logger.warn("Your local conflicted files will be overwritten by download.", print=TRUE)
+	    @logger.warn("Your local conflicted changes will be overwritten by current download.", print=TRUE)
 	    @logger.warn("Do you want to continue? (y/n)", print=TRUE)
 	    option = gets
 	    option = option.chomp
@@ -52,6 +32,7 @@ module Embulk
 	      @logger.info("Download cancelled by user.", print=TRUE)
 	      @logger.abortExecution(error=FALSE)
 	    end
+	    @dataset_utils.deleteStagedFile()
 	  end
 	end
      end
@@ -79,26 +60,16 @@ module Embulk
        end
        case option
          when action
+	    @logger.info("Checking remote changes and posible local conflicts...", print=TRUE) if action != RESUME
 	    task['data_streams'] = @dataset_utils.getRemoteChangedDataStreams(task['data_streams'])
-		self.warnConflicts(task['data_streams'], task['data_set'], action)
+	    self.warnConflicts(task['data_streams'], task['data_set']) if action != RESUME
 	    @dataset_utils.deleteCompletedFile()
            if task['data_streams'].empty?
-	          @logger.info("No new files in dataset.", print=TRUE)
              @logger.info("Your downloaded dataset is already up to date.", print=TRUE)
            end
          when DOWNLOAD
-		ebulk_file = @data_set_directory + "/.ebulk_dataset"
+	    @logger.info("Checking remote files and posible local conflicts...", print=TRUE)
-		ebulk_file_content = ""
+	    self.warnConflicts(task['data_streams'], task['data_set'])
-		if File.file?(ebulk_file)
-		  ebulk_file_content = File.read(ebulk_file)
-		end
-		FileUtils.rm_rf(@data_set_directory)
-		unless File.directory?(@data_set_directory)
-		  FileUtils.mkdir_p(@data_set_directory)
-		end
-		if ebulk_file_content != ""
-		  File.open(ebulk_file, 'w') { |file| file.write(ebulk_file_content) }
-		end
 	    @dataset_utils.deleteCompletedFile()
 	    @dataset_utils.createReportFile()
          when ABORT
@@ -114,18 +85,12 @@ module Embulk
 	  @erp5_url = config.param('erp5_url', :string)
 	  @data_set = config.param('data_set', :string)
 	  @logger.info("Dataset name: #{@data_set}")
-	  if @data_set == '$DATA_SET'
-	    @logger.error("There was an error setting the configuration file", print=TRUE)
-	    @logger.info("Please try manual download or update manually the download configuration file.", print=TRUE)
-	    @logger.abortExecution()
-	  end
 	  @user = config.param("user", :string, defualt: nil)
 	  @logger.info("User: #{@user}")
 	  @password = config.param("password", :string, default: nil)
-	  @chunk_size = config.param('chunk_size', :float, default: 0) * MEGA
+	  @chunk_size = config.param('chunk_size', :float, default: 0) * DatasetUtils::MEGA
 	  @output_path = config.param("output_path", :string,  :default => nil)
-	  if File.directory?(@output_path)
+	  if not File.directory?(@output_path)
-	  else
 	    @logger.error("Output directory not found.", print=TRUE)
 	    @logger.abortExecution()
 	  end
@@ -141,31 +106,21 @@ module Embulk
 		 'tool_dir' => @tool_dir
 	       }
 	  if task['chunk_size'] == 0
-	    task['chunk_size'] = CHUNK_SIZE
+	    task['chunk_size'] = DatasetUtils::CHUNK_SIZE
 	  end
-	  @logger.info("Chunk size set in #{task['chunk_size']/MEGA}MB")
+	  @logger.info("Chunk size set in #{task['chunk_size']/DatasetUtils::MEGA}MB")
-	  @data_set_directory = @output_path.end_with?("/") ? @output_path : @output_path + "/"
+	  @dataset_utils = DatasetUtils.new("")
-	  task['data_set_directory'] = @data_set_directory
+	  task['data_set_directory'] = @dataset_utils.appendSlashTo(@output_path)
+	  @data_set_directory = task['data_set_directory']
 	  @dataset_utils = DatasetUtils.new(@data_set_directory)
 	  @logger.info("Getting remote file list from dataset '#{@data_set}'...", print=TRUE)
 	  data_stream_list = @wendelin.getDataStreams(@data_set)
-	  n_retry = 0
-	  while data_stream_list["status_code"] == 2 and n_retry < 6
-	    sleep 10
-	    data_stream_list = @wendelin.getDataStreams(@data_set)
-	    n_retry += 1
-	  end
 	  if data_stream_list["status_code"] == 0
 	    if data_stream_list["result"].empty?
 	      @logger.error("No valid data found for data set " + @data_set, print=TRUE)
 	      @logger.abortExecution(error=FALSE)
 	    end
 	    task['data_streams'] = data_stream_list["result"]
-	  elsif data_stream_list["status_code"] == 2
-	    @logger.error("Dataset '#{@data_set}' has files recently ingested waiting for processing.", print=TRUE)
-	    @logger.error("Please retry in some minutes.", print=TRUE)
-	    @logger.abortExecution(error=FALSE)
 	  else
 	    @logger.error(data_stream_list["error_message"], print=TRUE)
 	    @logger.abortExecution()
@@ -189,13 +144,9 @@ module Embulk
 	      self.askUserForAction(task, action=UPDATE)
 	    end
 	  else
-	    dir_entries = Dir.entries(@data_set_directory).length
+	    if not @dataset_utils.dirEmpty(@data_set_directory)
-	    if File.file?(@data_set_directory+"/.ebulk_dataset")
-	      dir_entries -= 1
-	    end
-	    if dir_entries > 2
 	      puts
-	      @logger.info("Dataset download directory is not empty! It will be overwritten: " + @data_set_directory, print=TRUE)
+	      @logger.info("Dataset download directory is not empty! Its files could be overwritten: " + @data_set_directory, print=TRUE)
 	      @logger.info("Continue with download? (y/n)", print=TRUE)
 	      option = gets
 	      option = option.chomp
@@ -203,6 +154,8 @@ module Embulk
 	        @logger.info("Download cancelled by user.", print=TRUE)
 	        @logger.abortExecution(error=FALSE)
 	      end
+	      @logger.info("Checking remote files and posible local conflicts...", print=TRUE)
+	      self.warnConflicts(task['data_streams'], task['data_set'])
 	    end
 	    @dataset_utils.createReportFile()
 	  end
@@ -225,18 +178,7 @@ module Embulk
      def self.resume(task, columns, count, &control)
 	@logger = LogManager.instance()
        task_reports = yield(task, columns, count)
-	if task_reports.any?
+	@dataset_utils.showTaskReport(task_reports)
-	  @logger.info("Reports:", print=TRUE)
-	  if task_reports.length > 15
-	    @logger.info(task_reports[0, 5], print=TRUE)
-	    @logger.info(".....", print=TRUE)
-	    @logger.info(task_reports[task_reports.length-5, task_reports.length-1], print=TRUE)
-	  else
-	    @logger.info(task_reports, print=TRUE)
-	  end
-	  @logger.info("Full task report:")
-	  @logger.info(task_reports)
-	end
        next_config_diff = task_reports.map{|hash| hash[DatasetUtils::RUN_DONE]}.flatten.compact
 	if(next_config_diff.length == count)
 	  if(count > 0)
@@ -245,10 +187,15 @@ module Embulk
 	    @logger.info("Dataset files are in dataset directory: " + @data_set_directory, print=TRUE)
 	  end
 	  @dataset_utils.createCompletedFile()
-	  if count > 10
+	else
-	    next_config_diff = {}
+	  if(count > 0)
+	    puts
+	    @logger.error("Some remote files could not be downloaded. Please check the details in the log file: " + @logger.getLogPath(), print=TRUE)
+	    @logger.info("Please retry the operation for download those files.", print=TRUE)
+	    puts
 	  end
 	end
+	next_config_diff = {}
 	return {DatasetUtils::RUN_DONE => next_config_diff}
      end
@@ -296,7 +243,6 @@ module Embulk
 	else
 	  return_value = DatasetUtils::RUN_DONE
 	end
-	# update reports if operation successfully ended
 	if return_value == DatasetUtils::RUN_DONE
 	  if hash.to_s == DatasetUtils::DELETE
 	    @dataset_utils.deleteFromReport(ref, return_value)

--- a/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/output/fif.rb
+++ b/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/output/fif.rb
@@ -35,12 +35,9 @@ module Embulk
          page.each do |record|
 	    reference = record[0]
 	    data_chunk = Base64.decode64(record[1])
-	    data_set_directory = @output_path.end_with?("/") ? @output_path : @output_path + "/"
+	    @dataset_utils = DatasetUtils.new("")
-	    ref = reference.reverse.sub("/".reverse, ".".reverse).reverse.sub(record[2]+"/", "")
+	    data_set_directory = @dataset_utils.appendSlashTo(@output_path)
-	    if ref.end_with?(".none")
+	    file_path = @dataset_utils.referenceToPath(reference, data_set_directory, record[2])
-	      ref = ref[0...-5]
-	    end
-	    file_path = data_set_directory + ref
 	    write_mode = 'ab'
 	    if record[3] == DatasetUtils::DELETE
 	      File.delete(file_path) if File.exist?(file_path)
@@ -48,7 +45,7 @@ module Embulk
 	      if record[3] == TRUE.to_s
 	        write_mode = 'w'
 	      end
-	      dirname = File.dirname(data_set_directory + ref)
+	      dirname = File.dirname(file_path)
 	      unless File.directory?(dirname)
 	        FileUtils.mkdir_p(dirname)
 	      end

--- a/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/output/wendelin.rb
+++ b/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/output/wendelin.rb
@@ -46,11 +46,12 @@ module Embulk
 	  hash = record[7]
 	  begin
 	    if eof == DatasetUtils::DELETE
-	      reference = [dataset, filename, extension].join("/")
+	      reference = [dataset, filename, extension].join(DatasetUtils::REFERENCE_SEPARATOR)
 	      @wendelin.delete(reference)
 	    else
-	      reference = [supplier, dataset, filename, extension, eof, size, hash].join("/")
+	      reference = [supplier, dataset, filename, extension, eof, size, hash].join(DatasetUtils::REFERENCE_SEPARATOR)
-	      if not @wendelin.ingest(reference, data_chunk)
+	      split = eof != ""
+	      if not @wendelin.ingest(reference, data_chunk, split)
 	        raise "could not ingest"
 	      end
 	    end

--- a/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/parser/binary.rb
+++ b/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/parser/binary.rb
 require_relative '../filelogger'
+require_relative '../dataset_utils'
 class Index
  include Singleton
@@ -19,21 +20,20 @@ module Embulk
    class BinaryParserPlugin < ParserPlugin
      Plugin.register_parser("binary", self)
-      CHUNK_SIZE = 50
-      MEGA = 1000000
-      EOF = "EOF"
      def self.transaction(config, &control)
 	tool_dir = config.param('tool_dir', :string, default: ".")
 	@logger = LogManager.instance()
 	@logger.setFilename(tool_dir, "parser")
        task = {
-	  chunk_size: config.param('chunk_size', :float, default: CHUNK_SIZE) * MEGA,
+	  chunk_size: config.param('chunk_size', :float, default: 0) * DatasetUtils::MEGA,
 	  supplier: config.param("supplier", :string, default: "parser"),
 	  data_set: config.param("data_set", :string),
 	  input_plugin: config.param("storage", :string, default: "parser"),
 	  date: Time.now.strftime("%Y-%m-%d_%H-%M-%S")
        }
+	if task['chunk_size'] == 0
+	  task['chunk_size'] = DatasetUtils::CHUNK_SIZE
+	end
 	columns = [
            Column.new(0, "supplier", :string),
            Column.new(1, "data_set", :string),
@@ -71,7 +71,7 @@ module Embulk
      end
      private
-      def each_chunk(file, filename, chunk_size=CHUNK_SIZE)
+      def each_chunk(file, filename, chunk_size=DatasetUtils::CHUNK_SIZE)
 	extension = @index.to_s.rjust(3, "0")
 	npart = 0
        next_byte = file.read(1)
@@ -89,7 +89,7 @@ module Embulk
            data += file.read(chunk_size)
            next_byte = file.read(1)
            if not next_byte
-                eof = EOF
+                eof = DatasetUtils::EOF
 		if first
 		  # this means that the whole file will be ingested at once (not split)
 		  eof = ""

--- a/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/wendelin_client.rb
+++ b/ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/wendelin_client.rb
@@ -23,6 +23,9 @@ class WendelinClient
    rescue Exception => e
      @logger.error("An error occurred while checking if reference exists: " + e.to_s)
      @logger.error(e.backtrace)
+      if e.to_s.include? "Unauthorized" or e.to_s.include? "401"
+	raise e
+      end
      return FALSE
    else
      return res.to_s == 'TRUE'
@@ -53,27 +56,27 @@ class WendelinClient
    end
  end
-  def ingest(reference, data_chunk)
+  def ingest(reference, data_chunk, split)
      @logger.info("Ingestion reference: #{reference}", print=TRUE)
-      if Time.new - @last_ingestion < 2
+      if split and Time.new - @last_ingestion < 3
-	# avoid send ingestions to close (specially for split ones)
+	# avoid to send split ingestions to close
-	sleep 2
+	sleep 3
      end
      if exists(reference)
-        @logger.info("There is another ingestion already done for the pair data_set-filename. Reference "\
+        @logger.info("There is another ingestion already done for the pair dataset-filename. Reference "\
              + reference, print=TRUE)
-	@logger.info("Rename your reference or delete the older ingestion.", print=TRUE)
+	@logger.info("Rename your file or download the full dataset to make local changes.", print=TRUE)
        return FALSE
      end
      if reference.include? "#" or reference.include? "+"
-	raise "Invalid chars in file name. Please rename it."
+	raise "invalid chars in file name. Please rename it."
      end
      begin
      	uri = URI("#{@erp5_url}/ingest?reference=#{reference}")
      rescue Exception => e
        @logger.error("An error occurred while generating url: " + e.to_s)
        @logger.error(e.backtrace)
-	raise "Invalid chars in file name. Please rename it."
+	raise "invalid chars in file name. Please rename it."
      end
      response = handleRequest(uri, reference, data_chunk)
      if response == FALSE
@@ -138,7 +141,7 @@ class WendelinClient
          res = Net::HTTP.start(uri.hostname, uri.port,
 	          :use_ssl      => (uri.scheme == 'https'),
 	          :verify_mode  => OpenSSL::SSL::VERIFY_NONE,
-	          :ssl_timeout  => 20, :open_timeout => 20, :read_timeout => 20,
+	          :ssl_timeout  => 300, :open_timeout => 300, :read_timeout => 300,
 	        ) do |http|
 		  http.request(req)
 		end