Commit e6bea615 authored by Roque's avatar Roque

Allow ebulk to run in "always yes" mode

See merge request !2
parents 452e2f8d 04090942
...@@ -44,6 +44,7 @@ UPDATE="U" ...@@ -44,6 +44,7 @@ UPDATE="U"
RESUME="R" RESUME="R"
DOWNLOAD="D" DOWNLOAD="D"
ASK="A" ASK="A"
ALWAYS_YES="false"
# load data lake url from file if exists # load data lake url from file if exists
if [ -f "$DATA_LAKE_URL_FILE" ]; then if [ -f "$DATA_LAKE_URL_FILE" ]; then
...@@ -61,6 +62,7 @@ function helpReadme { ...@@ -61,6 +62,7 @@ function helpReadme {
} }
function checkParameters { function checkParameters {
re='^[_A-Za-z.0-9-]*$'
if [ ! -f $TEMPLATE_FILE ]; then if [ ! -f $TEMPLATE_FILE ]; then
echo echo
echo -e "${ORANGE}[ERROR] File '$TEMPLATE_FILE' not found!${NC}" >&2; return 1 echo -e "${ORANGE}[ERROR] File '$TEMPLATE_FILE' not found!${NC}" >&2; return 1
...@@ -90,6 +92,19 @@ function checkParameters { ...@@ -90,6 +92,19 @@ function checkParameters {
fi fi
fi fi
EBULK_DATASET_FILE="$DATASET_DIR$EBULK_DATASET_FILE_NAME" EBULK_DATASET_FILE="$DATASET_DIR$EBULK_DATASET_FILE_NAME"
if ! [[ $REFERENCE =~ $re ]] ; then
if [ "$REFERENCE" = "." ] && [[ -z "$STORAGE" ]] ; then
echo
echo -e "${ORANGE}[ERROR] You are not in a dataset directory ${GREEN}'$REFERENCE'${ORANGE}.${NC}"
echo
else
echo
echo -e "${ORANGE}[ERROR] Error in argument: invalid dataset name ${GREEN}'$REFERENCE'${ORANGE}.${NC}"
echo -e "${ORANGE}[ERROR] Only alphanumerics, dots ( . ), underscores ( _ ) and hyphens ( - ) are allowed.${NC}"
echo
fi
helpReadme >&2; return 1
fi
if [[ $DATASET_DIR != $REFERENCE ]]; then if [[ $DATASET_DIR != $REFERENCE ]]; then
if [ "$REFERENCE" = "." ] ; then if [ "$REFERENCE" = "." ] ; then
REFERENCE=$(basename "$DATASET_DIR") REFERENCE=$(basename "$DATASET_DIR")
...@@ -123,7 +138,6 @@ function checkParameters { ...@@ -123,7 +138,6 @@ function checkParameters {
if [ "$DATA_SET" == "." ] ; then if [ "$DATA_SET" == "." ] ; then
DATA_SET=$(basename $(pwd)) DATA_SET=$(basename $(pwd))
fi fi
re='^[_A-Za-z.0-9-]*$'
if ! [[ $DATA_SET =~ $re ]] ; then if ! [[ $DATA_SET =~ $re ]] ; then
if [ "$DATA_SET" = "." ] && [[ -z "$STORAGE" ]] ; then if [ "$DATA_SET" = "." ] && [[ -z "$STORAGE" ]] ; then
echo echo
...@@ -280,6 +294,7 @@ function updateConfigFile { ...@@ -280,6 +294,7 @@ function updateConfigFile {
ING_URL=\"$ING_URL\" ING_URL=\"$ING_URL\"
STORAGE=\"$STORAGE\" STORAGE=\"$STORAGE\"
STATUS=\"$STATUS\" STATUS=\"$STATUS\"
ALWAYS_YES=\"$ALWAYS_YES\"
S3_BUCKET=\"$S3_BUCKET\" S3_BUCKET=\"$S3_BUCKET\"
S3_PREFIX=\"$S3_PREFIX\" S3_PREFIX=\"$S3_PREFIX\"
...@@ -603,6 +618,8 @@ while [ "$1" != "" ]; do ...@@ -603,6 +618,8 @@ while [ "$1" != "" ]; do
-d | --directory ) shift -d | --directory ) shift
DATASET_DIR=$1 DATASET_DIR=$1
;; ;;
-y | --yes ) ALWAYS_YES="true"
;;
-s | --storage ) shift -s | --storage ) shift
STORAGE=$1 STORAGE=$1
;; ;;
...@@ -746,6 +763,9 @@ case $OPERATION in ...@@ -746,6 +763,9 @@ case $OPERATION in
echo echo
fi fi
echo "### DATASET DOWNLOAD ###" echo "### DATASET DOWNLOAD ###"
if [ "$ALWAYS_YES" = "true" ] ; then
echo "[always yes mode]"
fi
echo echo
if [ "$DISCARD_CHANGES" != "" ] ; then if [ "$DISCARD_CHANGES" != "" ] ; then
DISCARD_CHANGES_FILE="$DATASET_DIR$DISCARD_CHANGES_FILE_NAME" DISCARD_CHANGES_FILE="$DATASET_DIR$DISCARD_CHANGES_FILE_NAME"
...@@ -755,8 +775,10 @@ case $OPERATION in ...@@ -755,8 +775,10 @@ case $OPERATION in
echo -e "** The dataset will be downloaded in the specified directory: $DATASET_DIR" echo -e "** The dataset will be downloaded in the specified directory: $DATASET_DIR"
fi fi
echo echo
if [ "$ALWAYS_YES" = "false" ] ; then
read -n 1 -s -r -p "Press any key to continue" read -n 1 -s -r -p "Press any key to continue"
echo echo
fi
runProcess runProcess
;; ;;
push) push)
...@@ -795,11 +817,16 @@ case $OPERATION in ...@@ -795,11 +817,16 @@ case $OPERATION in
esac esac
fi fi
echo "### DATASET INGESTION ###" echo "### DATASET INGESTION ###"
if [ "$ALWAYS_YES" = "true" ] ; then
echo "[always yes mode]"
else
echo echo
echo -e "** The tool will look for dataset files in the specified $MESSAGE" echo -e "** The tool will look for dataset files in the specified $MESSAGE"
echo -e "** Please make sure to put your dataset files there for ingestion." echo -e "** Please make sure to put your dataset files there for ingestion."
echo echo
read -n 1 -s -r -p "Press any key to continue" read -n 1 -s -r -p "Press any key to continue"
fi
echo echo
runProcess runProcess
;; ;;
......
...@@ -8,6 +8,7 @@ in: ...@@ -8,6 +8,7 @@ in:
chunk_size: $CHUNK chunk_size: $CHUNK
output_path: $DATASET_DIR output_path: $DATASET_DIR
tool_dir: $TOOL_DIR tool_dir: $TOOL_DIR
always_yes: $ALWAYS_YES
out: out:
type: fif type: fif
......
...@@ -10,6 +10,7 @@ in: ...@@ -10,6 +10,7 @@ in:
erp5_url: $DOWN_URL erp5_url: $DOWN_URL
tool_dir: $TOOL_DIR tool_dir: $TOOL_DIR
status: $STATUS status: $STATUS
always_yes: $ALWAYS_YES
out: out:
type: wendelin type: wendelin
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# PLEASE FILL THE 'IN' SECTION ACCORDING TO YOUR PLUGIN # PLEASE FILL THE 'IN' SECTION ACCORDING TO YOUR PLUGIN
in: in:
always_yes: $ALWAYS_YES
# FOR EXAMPLE CSV FILES # FOR EXAMPLE CSV FILES
# type: file # type: file
......
...@@ -8,6 +8,7 @@ in: ...@@ -8,6 +8,7 @@ in:
user: $FTP_USER user: $FTP_USER
password: $FTP_PASSWORD password: $FTP_PASSWORD
path_prefix: $FTP_PATH path_prefix: $FTP_PATH
always_yes: $ALWAYS_YES
#ssl_verify: false #ssl_verify: false
#port: 21 #port: 21
......
...@@ -6,6 +6,7 @@ in: ...@@ -6,6 +6,7 @@ in:
type: http type: http
url: $HTTP_URL url: $HTTP_URL
method: $HTTP_METHOD method: $HTTP_METHOD
always_yes: $ALWAYS_YES
# basic_auth: # basic_auth:
# user: MyUser # user: MyUser
# password: MyPassword # password: MyPassword
......
...@@ -9,6 +9,7 @@ in: ...@@ -9,6 +9,7 @@ in:
access_key_id: $S3_ACCESS_KEY access_key_id: $S3_ACCESS_KEY
secret_access_key: $S3_SECRET_KEY secret_access_key: $S3_SECRET_KEY
auth_method: $S3_AUTH_METHOD auth_method: $S3_AUTH_METHOD
always_yes: $ALWAYS_YES
# endpoint: # endpoint:
# region: # region:
# path_match_pattern: # path_match_pattern:
......
...@@ -60,11 +60,15 @@ module Embulk ...@@ -60,11 +60,15 @@ module Embulk
if not conflicts.empty? if not conflicts.empty?
puts puts
@logger.warn("Some of your local files already exist in remote dataset.", print=TRUE) @logger.warn("Some of your local files already exist in remote dataset.", print=TRUE)
if task['always_yes_mode'] != "true"
@logger.warn("You may want to cancel this partial ingestion and download the full dataset to make local changes.", print=TRUE) @logger.warn("You may want to cancel this partial ingestion and download the full dataset to make local changes.", print=TRUE)
end
puts puts
@logger.warn("Current ingestion WILL OVERWRITE the following files in remote dataset:", print=TRUE) @logger.warn("Current ingestion WILL OVERWRITE the following files in remote dataset:", print=TRUE)
if task['always_yes_mode'] != "true"
puts "** press key **" puts "** press key **"
option = gets option = gets
end
print_short = conflicts.length > 500 print_short = conflicts.length > 500
@dataset_utils.showChangesList(conflicts, "", print_short, status=DatasetUtils::OVERWRITE) @dataset_utils.showChangesList(conflicts, "", print_short, status=DatasetUtils::OVERWRITE)
else else
...@@ -85,6 +89,7 @@ module Embulk ...@@ -85,6 +89,7 @@ module Embulk
if task['chunk_size'] == 0 if task['chunk_size'] == 0
task['chunk_size'] = DatasetUtils::CHUNK_SIZE task['chunk_size'] = DatasetUtils::CHUNK_SIZE
end end
task['always_yes_mode'] = config.param('always_yes', :string)
@data_set = task['data_set'] @data_set = task['data_set']
@dataset_utils = DatasetUtils.new("") @dataset_utils = DatasetUtils.new("")
paths = config.param('path_prefix', :array) paths = config.param('path_prefix', :array)
...@@ -133,7 +138,7 @@ module Embulk ...@@ -133,7 +138,7 @@ module Embulk
else else
if not @dataset_utils.partialIngestionFileExist() if not @dataset_utils.partialIngestionFileExist()
@logger.info("Checking local dataset...", print=TRUE) @logger.info("Checking local dataset...", print=TRUE)
if not @dataset_utils.reportUpToDate(data_stream_dict, @data_set) if not @dataset_utils.reportUpToDate(data_stream_dict, @data_set) and task['always_yes_mode'] != "true"
puts puts
@logger.error("Your current dataset is outdated. Please, run a download to update it before ingest your changes.", print=TRUE) @logger.error("Your current dataset is outdated. Please, run a download to update it before ingest your changes.", print=TRUE)
puts puts
...@@ -153,6 +158,7 @@ module Embulk ...@@ -153,6 +158,7 @@ module Embulk
end end
self.status(task, push=TRUE) self.status(task, push=TRUE)
if task['always_yes_mode'] != "true"
@logger.info("Continue with ingestion? (y/n)", print=TRUE) @logger.info("Continue with ingestion? (y/n)", print=TRUE)
option = gets option = gets
option = option.chomp option = option.chomp
...@@ -160,6 +166,7 @@ module Embulk ...@@ -160,6 +166,7 @@ module Embulk
@logger.info("Ingestion cancelled by user.", print=TRUE) @logger.info("Ingestion cancelled by user.", print=TRUE)
@logger.abortExecution() @logger.abortExecution()
end end
end
if not @dataset_utils.reportFileExist() if not @dataset_utils.reportFileExist()
@dataset_utils.createReportFile() @dataset_utils.createReportFile()
end end
......
...@@ -31,6 +31,14 @@ module Embulk ...@@ -31,6 +31,14 @@ module Embulk
end end
end end
def self.alwaysYesDownload()
@dataset_utils.deleteSplitOperationControlFile()
@dataset_utils.deleteSplitOperationFile()
@dataset_utils.deleteCompletedFile()
@dataset_utils.createReportFile()
@logger.info("[always yes mode] Download operation will overwrite previous local dataset.", print=TRUE)
end
def self.askUserForAction(task, action, show_message) def self.askUserForAction(task, action, show_message)
option = @dataset_utils.getConfiguration(action, task['tool_dir']) option = @dataset_utils.getConfiguration(action, task['tool_dir'])
valid_option = option != DatasetUtils::OPTION_ABORT ? TRUE : FALSE valid_option = option != DatasetUtils::OPTION_ABORT ? TRUE : FALSE
...@@ -104,7 +112,8 @@ module Embulk ...@@ -104,7 +112,8 @@ module Embulk
'data_set' => @data_set, 'data_set' => @data_set,
'chunk_size' => DatasetUtils::CHUNK_SIZE + 10, 'chunk_size' => DatasetUtils::CHUNK_SIZE + 10,
'output_path' => @output_path, 'output_path' => @output_path,
'tool_dir' => @tool_dir 'tool_dir' => @tool_dir,
'always_yes_mode' => config.param('always_yes', :string)
} }
if task['chunk_size'] == 0 if task['chunk_size'] == 0
task['chunk_size'] = DatasetUtils::CHUNK_SIZE task['chunk_size'] = DatasetUtils::CHUNK_SIZE
...@@ -158,10 +167,18 @@ module Embulk ...@@ -158,10 +167,18 @@ module Embulk
task['data_streams'] = @dataset_utils.getRemoteFileListForDiscardLocalChanges(task['data_streams'], @data_set, task['data_streams'] = @dataset_utils.getRemoteFileListForDiscardLocalChanges(task['data_streams'], @data_set,
check_changes=FALSE, changes=local_changes) check_changes=FALSE, changes=local_changes)
else else
if task['always_yes_mode'] != "true"
self.askUserForAction(task, action=DatasetUtils::OPTION_UPDATE, show_message=TRUE) self.askUserForAction(task, action=DatasetUtils::OPTION_UPDATE, show_message=TRUE)
else
self.alwaysYesDownload()
end
end end
elsif not @dataset_utils.partialIngestionFileExist() elsif not @dataset_utils.partialIngestionFileExist()
if task['always_yes_mode'] != "true"
self.askUserForAction(task, action=DatasetUtils::OPTION_RESUME, show_message=TRUE) self.askUserForAction(task, action=DatasetUtils::OPTION_RESUME, show_message=TRUE)
else
self.alwaysYesDownload()
end
else else
if @dataset_utils.discardChangesFileExist() if @dataset_utils.discardChangesFileExist()
puts puts
......
...@@ -39,5 +39,6 @@ options: ...@@ -39,5 +39,6 @@ options:
-s, --storage <storage> Uses the selected input storage from this set: [http, ftp, s3] -s, --storage <storage> Uses the selected input storage from this set: [http, ftp, s3]
-cs, --custom-storage Allows user to set a new input storage -cs, --custom-storage Allows user to set a new input storage
-a, --advanced Allows to edit the Embulk cofiguration file of the input storage -a, --advanced Allows to edit the Embulk cofiguration file of the input storage
-y, --yes Enables "always yes" mode and ebulk is run assuming yes to all user inputs required
-dc, --discard-changes Discards local changes by checking the remote dataset -dc, --discard-changes Discards local changes by checking the remote dataset
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment