Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
E
ebulk
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
ebulk
Commits
f7de6621
Commit
f7de6621
authored
Oct 03, 2018
by
roqueporchetto@gmail.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new comands for staging and reset
parent
963e3e0d
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
590 additions
and
343 deletions
+590
-343
ebulk
ebulk
+151
-69
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/dataset_utils.rb
.../embulk-wendelin-dataset-tool/lib/embulk/dataset_utils.rb
+249
-27
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/input/fif.rb
...data/embulk-wendelin-dataset-tool/lib/embulk/input/fif.rb
+103
-107
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/input/wendelin.rb
...embulk-wendelin-dataset-tool/lib/embulk/input/wendelin.rb
+60
-114
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/output/fif.rb
...ata/embulk-wendelin-dataset-tool/lib/embulk/output/fif.rb
+4
-7
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/output/wendelin.rb
...mbulk-wendelin-dataset-tool/lib/embulk/output/wendelin.rb
+4
-3
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/parser/binary.rb
.../embulk-wendelin-dataset-tool/lib/embulk/parser/binary.rb
+7
-7
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/wendelin_client.rb
...mbulk-wendelin-dataset-tool/lib/embulk/wendelin_client.rb
+12
-9
No files found.
ebulk
View file @
f7de6621
This diff is collapsed.
Click to expand it.
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/dataset_utils.rb
View file @
f7de6621
This diff is collapsed.
Click to expand it.
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/input/fif.rb
View file @
f7de6621
This diff is collapsed.
Click to expand it.
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/input/wendelin.rb
View file @
f7de6621
This diff is collapsed.
Click to expand it.
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/output/fif.rb
View file @
f7de6621
...
...
@@ -35,12 +35,9 @@ module Embulk
page
.
each
do
|
record
|
reference
=
record
[
0
]
data_chunk
=
Base64
.
decode64
(
record
[
1
])
data_set_directory
=
@output_path
.
end_with?
(
"/"
)
?
@output_path
:
@output_path
+
"/"
ref
=
reference
.
reverse
.
sub
(
"/"
.
reverse
,
"."
.
reverse
).
reverse
.
sub
(
record
[
2
]
+
"/"
,
""
)
if
ref
.
end_with?
(
".none"
)
ref
=
ref
[
0
...-
5
]
end
file_path
=
data_set_directory
+
ref
@dataset_utils
=
DatasetUtils
.
new
(
""
)
data_set_directory
=
@dataset_utils
.
appendSlashTo
(
@output_path
)
file_path
=
@dataset_utils
.
referenceToPath
(
reference
,
data_set_directory
,
record
[
2
])
write_mode
=
'ab'
if
record
[
3
]
==
DatasetUtils
::
DELETE
File
.
delete
(
file_path
)
if
File
.
exist?
(
file_path
)
...
...
@@ -48,7 +45,7 @@ module Embulk
if
record
[
3
]
==
TRUE
.
to_s
write_mode
=
'w'
end
dirname
=
File
.
dirname
(
data_set_directory
+
ref
)
dirname
=
File
.
dirname
(
file_path
)
unless
File
.
directory?
(
dirname
)
FileUtils
.
mkdir_p
(
dirname
)
end
...
...
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/output/wendelin.rb
View file @
f7de6621
...
...
@@ -46,11 +46,12 @@ module Embulk
hash
=
record
[
7
]
begin
if
eof
==
DatasetUtils
::
DELETE
reference
=
[
dataset
,
filename
,
extension
].
join
(
"/"
)
reference
=
[
dataset
,
filename
,
extension
].
join
(
DatasetUtils
::
REFERENCE_SEPARATOR
)
@wendelin
.
delete
(
reference
)
else
reference
=
[
supplier
,
dataset
,
filename
,
extension
,
eof
,
size
,
hash
].
join
(
"/"
)
if
not
@wendelin
.
ingest
(
reference
,
data_chunk
)
reference
=
[
supplier
,
dataset
,
filename
,
extension
,
eof
,
size
,
hash
].
join
(
DatasetUtils
::
REFERENCE_SEPARATOR
)
split
=
eof
!=
""
if
not
@wendelin
.
ingest
(
reference
,
data_chunk
,
split
)
raise
"could not ingest"
end
end
...
...
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/parser/binary.rb
View file @
f7de6621
require_relative
'../filelogger'
require_relative
'../dataset_utils'
class
Index
include
Singleton
...
...
@@ -19,21 +20,20 @@ module Embulk
class
BinaryParserPlugin
<
ParserPlugin
Plugin
.
register_parser
(
"binary"
,
self
)
CHUNK_SIZE
=
50
MEGA
=
1000000
EOF
=
"EOF"
def
self
.
transaction
(
config
,
&
control
)
tool_dir
=
config
.
param
(
'tool_dir'
,
:string
,
default:
"."
)
@logger
=
LogManager
.
instance
()
@logger
.
setFilename
(
tool_dir
,
"parser"
)
task
=
{
chunk_size:
config
.
param
(
'chunk_size'
,
:float
,
default:
CHUNK_SIZE
)
*
MEGA
,
chunk_size:
config
.
param
(
'chunk_size'
,
:float
,
default:
0
)
*
DatasetUtils
::
MEGA
,
supplier:
config
.
param
(
"supplier"
,
:string
,
default:
"parser"
),
data_set:
config
.
param
(
"data_set"
,
:string
),
input_plugin:
config
.
param
(
"storage"
,
:string
,
default:
"parser"
),
date:
Time
.
now
.
strftime
(
"%Y-%m-%d_%H-%M-%S"
)
}
if
task
[
'chunk_size'
]
==
0
task
[
'chunk_size'
]
=
DatasetUtils
::
CHUNK_SIZE
end
columns
=
[
Column
.
new
(
0
,
"supplier"
,
:string
),
Column
.
new
(
1
,
"data_set"
,
:string
),
...
...
@@ -71,7 +71,7 @@ module Embulk
end
private
def
each_chunk
(
file
,
filename
,
chunk_size
=
CHUNK_SIZE
)
def
each_chunk
(
file
,
filename
,
chunk_size
=
DatasetUtils
::
CHUNK_SIZE
)
extension
=
@index
.
to_s
.
rjust
(
3
,
"0"
)
npart
=
0
next_byte
=
file
.
read
(
1
)
...
...
@@ -89,7 +89,7 @@ module Embulk
data
+=
file
.
read
(
chunk_size
)
next_byte
=
file
.
read
(
1
)
if
not
next_byte
eof
=
EOF
eof
=
DatasetUtils
::
EOF
if
first
# this means that the whole file will be ingested at once (not split)
eof
=
""
...
...
ebulk-data/embulk-wendelin-dataset-tool/lib/embulk/wendelin_client.rb
View file @
f7de6621
...
...
@@ -23,6 +23,9 @@ class WendelinClient
rescue
Exception
=>
e
@logger
.
error
(
"An error occurred while checking if reference exists: "
+
e
.
to_s
)
@logger
.
error
(
e
.
backtrace
)
if
e
.
to_s
.
include?
"Unauthorized"
or
e
.
to_s
.
include?
"401"
raise
e
end
return
FALSE
else
return
res
.
to_s
==
'TRUE'
...
...
@@ -53,27 +56,27 @@ class WendelinClient
end
end
def
ingest
(
reference
,
data_chunk
)
def
ingest
(
reference
,
data_chunk
,
split
)
@logger
.
info
(
"Ingestion reference:
#{
reference
}
"
,
print
=
TRUE
)
if
Time
.
new
-
@last_ingestion
<
2
# avoid
send ingestions to close (specially for split ones)
sleep
2
if
split
and
Time
.
new
-
@last_ingestion
<
3
# avoid
to send split ingestions to close
sleep
3
end
if
exists
(
reference
)
@logger
.
info
(
"There is another ingestion already done for the pair data
_
set-filename. Reference "
\
@logger
.
info
(
"There is another ingestion already done for the pair dataset-filename. Reference "
\
+
reference
,
print
=
TRUE
)
@logger
.
info
(
"Rename your
reference or delete the older ingestion
."
,
print
=
TRUE
)
@logger
.
info
(
"Rename your
file or download the full dataset to make local changes
."
,
print
=
TRUE
)
return
FALSE
end
if
reference
.
include?
"#"
or
reference
.
include?
"+"
raise
"
I
nvalid chars in file name. Please rename it."
raise
"
i
nvalid chars in file name. Please rename it."
end
begin
uri
=
URI
(
"
#{
@erp5_url
}
/ingest?reference=
#{
reference
}
"
)
rescue
Exception
=>
e
@logger
.
error
(
"An error occurred while generating url: "
+
e
.
to_s
)
@logger
.
error
(
e
.
backtrace
)
raise
"
I
nvalid chars in file name. Please rename it."
raise
"
i
nvalid chars in file name. Please rename it."
end
response
=
handleRequest
(
uri
,
reference
,
data_chunk
)
if
response
==
FALSE
...
...
@@ -138,7 +141,7 @@ class WendelinClient
res
=
Net
::
HTTP
.
start
(
uri
.
hostname
,
uri
.
port
,
:use_ssl
=>
(
uri
.
scheme
==
'https'
),
:verify_mode
=>
OpenSSL
::
SSL
::
VERIFY_NONE
,
:ssl_timeout
=>
20
,
:open_timeout
=>
20
,
:read_timeout
=>
2
0
,
:ssl_timeout
=>
300
,
:open_timeout
=>
300
,
:read_timeout
=>
30
0
,
)
do
|
http
|
http
.
request
(
req
)
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment