Commit e0f8ef54 authored by Xiaowu Zhang's avatar Xiaowu Zhang

remove py

parent e0e329a1
#!/usr/bin/env python3
# Developed with Python 3.8.5
import argparse
import sys
import os
import stat
import traceback
import hashlib
import io
import multiprocessing
import urllib
import time
import urllib3
from msgpack import dumps
import psutil
import posix1e # pylibacl
import certifi
def compute_hashes(entry_path):
with open(entry_path, mode="rb") as f:
md5 = hashlib.md5()
sha1 = hashlib.sha1()
sha256 = hashlib.sha256()
sha512 = hashlib.sha512()
while True:
data = f.read(io.DEFAULT_BUFFER_SIZE)
md5.update(data)
sha1.update(data)
sha256.update(data)
sha512.update(data)
if len(data) < io.DEFAULT_BUFFER_SIZE:
break
return {"md5": md5.hexdigest(),
"sha1": sha1.hexdigest(),
"sha256": sha256.hexdigest(),
"sha512": sha512.hexdigest()}
def stat_result_to_dict(stat_result):
return {
"st_mode": stat_result.st_mode if hasattr(stat_result, "st_mode") else None,
"st_ino": stat_result.st_ino if hasattr(stat_result, "st_ino") else None,
"st_dev": stat_result.st_dev if hasattr(stat_result, "st_dev") else None,
"st_nlink": stat_result.st_nlink if hasattr(stat_result, "st_nlink") else None,
"st_uid": stat_result.st_uid if hasattr(stat_result, "st_uid") else None,
"st_gid": stat_result.st_gid if hasattr(stat_result, "st_gid") else None,
"st_size": stat_result.st_size if hasattr(stat_result, "st_size") else None,
"st_atime": stat_result.st_atime if hasattr(stat_result, "st_atime") else None,
"st_mtime": stat_result.st_mtime if hasattr(stat_result, "st_mtime") else None,
"st_ctime": stat_result.st_ctime if hasattr(stat_result, "st_ctime") else None,
"st_blocks": stat_result.st_blocks if hasattr(stat_result, "st_blocks") else None,
"st_blksize": stat_result.st_blksize if hasattr(stat_result, "st_blksize") else None,
"st_rdev": stat_result.st_rdev if hasattr(stat_result, "st_rdev") else None,
"st_flags": stat_result.st_flags if hasattr(stat_result, "st_flags") else None,
"st_gen": stat_result.st_gen if hasattr(stat_result, "st_gen") else None,
"st_birthtime": stat_result.st_birthtime if hasattr(stat_result, "st_birthtime") else None,
}
def construct_fs_tree(mp_pool=None, mp_tasks=[], cur_dict=None, path="/", dev_whitelist=None, ignored_dirs=[]):
is_first_call = False
if mp_pool == None:
is_first_call = True
mp_pool = multiprocessing.Pool()
if cur_dict == None:
cur_dict = {"stat": stat_result_to_dict(os.stat(path, follow_symlinks=False)),
"childs": dict()}
if dev_whitelist != None:
path_stat = cur_dict["stat"]
if "st_dev" in path_stat:
if not path_stat["st_dev"] in dev_whitelist:
return cur_dict
for dir in ignored_dirs:
if path.startswith(dir):
cur_dict["ignored"] = True
return cur_dict
try:
with os.scandir(path) as it:
for entry in it:
try:
entry_path = os.fsdecode(entry.path)
entry_name = os.fsdecode(entry.name)
try:
entry_stat = os.stat(entry_path, follow_symlinks=False)
except Exception:
traceback.print_exc()
entry_stat = None
cur_dict["childs"][entry_name] = {"stat": stat_result_to_dict(entry_stat),
"childs": dict()}
try:
cur_dict["childs"][entry_name]["xattrs"] = dict()
for k in os.listxattr(entry_path, follow_symlinks=False):
cur_dict["childs"][entry_name]["xattrs"][k] = os.getxattr(
entry_path, k, follow_symlinks=False)
except Exception:
traceback.print_exc()
try:
cur_dict["childs"][entry_name]["posix_acls"] = posix1e.ACL(
file=entry_path).to_any_text(options=posix1e.TEXT_ALL_EFFECTIVE)
except Exception:
traceback.print_exc()
if stat.S_ISDIR(entry_stat.st_mode):
construct_fs_tree(mp_pool=mp_pool, mp_tasks=mp_tasks, cur_dict=cur_dict["childs"][entry_name],
path=entry_path, dev_whitelist=dev_whitelist)
elif stat.S_ISREG(entry_stat.st_mode):
mp_tasks.append({"result": mp_pool.apply_async(compute_hashes, [entry_path]),
"merge_into": cur_dict["childs"][entry_name]})
elif stat.S_ISLNK(entry_stat.st_mode):
cur_dict["childs"][entry_name]["symlink_target"] = os.readlink(
entry_path)
except Exception:
traceback.print_exc()
except Exception:
traceback.print_exc()
if is_first_call == True:
mp_pool.close()
for task in mp_tasks:
try:
result = task["result"].get()
for k in iter(result):
task["merge_into"][k] = result[k]
except Exception:
traceback.print_exc()
mp_pool.join()
return cur_dict
def upload_to_erp5(fileobj, size, base_url, username, password, reference):
pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED',
ca_certs=certifi.where())
url = '{0}/portal_contributions/newContent'.format(base_url)
headers = urllib3.make_headers(
keep_alive=True, basic_auth='{0}:{1}'.format(username, password))
resp = pool.request('GET', url, fields={'portal_type': 'File',
'filename': '{0}.metadata'.format(reference),
'container_path': 'document_module',
'data': ''},
headers=headers,
redirect=False)
# workaround for ERP5 disappearing documents race condition bug
time.sleep(3)
upload_path = resp.headers['X-Location']
data = fileobj.read(io.DEFAULT_BUFFER_SIZE)
offset = 0
upload_headers = headers
while data:
upload_headers['Content-Range'] = 'bytes {0}-{1}/{2}'.format(
offset, offset+len(data), size)
pool.urlopen('PUT', upload_path, headers=upload_headers, body=data)
offset += len(data)
data = fileobj.read(io.DEFAULT_BUFFER_SIZE)
parsed = urllib.parse.urlparse(upload_path)
resp = pool.request('POST', upload_path, fields={
'form_id': 'File_view',
'object_path': parsed.path,
'field_my_reference': reference,
'Base_edit:method': '',
},
redirect=False,
headers=headers)
# workaround for ERP5 disappearing documents race condition bug
time.sleep(3)
resp = pool.request('POST', upload_path, fields={
'dialog_id': 'Base_viewWorkflowActionDialog',
'dialog_method': 'Workflow_statusModify',
'form_id': 'File_view',
'object_path': parsed.path,
'field_your_comment': '',
'field_your_workflow_action': 'share_action',
'Base_callDialogMethod:method': '',
},
redirect=False,
headers=headers)
# workaround for ERP5 disappearing documents race condition bug
time.sleep(3)
def main():
parser = argparse.ArgumentParser(
description="Collect and report metadata about a system")
parser.add_argument("start_directory", type=str, default="/")
parser.add_argument("--ignored-dirs", type=str, nargs="+", default=[])
parser.add_argument("--erp5-user", type=str)
parser.add_argument("--erp5-pass", type=str)
parser.add_argument("--erp5-file-reference", type=str)
parser.add_argument("--erp5-base-url", type=str)
args = parser.parse_args()
parts = psutil.disk_partitions(all=False)
dev_whitelist = list()
for part in parts:
dev_whitelist.append(
os.stat(part.mountpoint, follow_symlinks=False).st_dev)
tree = construct_fs_tree(path=args.start_directory,
dev_whitelist=dev_whitelist, ignored_dirs=args.ignored_dirs)
final = {'disk_partitions': parts, 'fs_tree': tree}
packed = dumps(final)
upload_to_erp5(io.BytesIO(packed), len(packed), args.erp5_base_url,
args.erp5_user, args.erp5_pass, args.erp5_file_reference)
if __name__ == "__main__":
main()
#!/usr/bin/env python3
from setuptools import setup, find_packages
setup(
name="metadata-collect-agent",
version="0.1",
packages=find_packages(),
scripts=["main.py"],
install_requires=["pylibacl>=0.5.4",
"msgpack>=1.0.0",
"urllib3>=1.25.10"
"psutil>=5.7.2"],
author="Nexedi SA",
author_email="leo.le.bouter@nexedi.com"
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment