Commit 65220674 authored by mouadh's avatar mouadh

mem_bech

parent be5fedf0
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from ..tools.mem_bench import memory_usage
from ..tools.connection import MyDB from ..tools.connection import MyDB
import pandas.io.sql as psql import pandas.io.sql as psql
import os import os
...@@ -18,14 +19,17 @@ def _load_table_config_file(executer_instance, cube_obj): ...@@ -18,14 +19,17 @@ def _load_table_config_file(executer_instance, cube_obj):
db = MyDB(db=executer_instance.cube) db = MyDB(db=executer_instance.cube)
memory_usage("1 - before executing query //// _load_table_config_file")
for table in cube_obj.dimensions: for table in cube_obj.dimensions:
value = psql.read_sql_query("SELECT * FROM {0}".format(table.name), with db.engine as connection:
db.engine) value = psql.read_sql_query("SELECT * FROM {0}".format(table.name),
connection)
tables[table.name] = value[[ tables[table.name] = value[[
col for col in value.columns if col.lower()[-3:] != '_id' col for col in value.columns if col.lower()[-3:] != '_id'
]] ]]
memory_usage("2 - after query, before fetchall /////// _load_table_config_file")
# update table display name # update table display name
for dimension in cube_obj.dimensions: for dimension in cube_obj.dimensions:
if dimension.displayName and dimension.name and dimension.displayName != dimension.name: if dimension.displayName and dimension.name and dimension.displayName != dimension.name:
...@@ -47,17 +51,22 @@ def _construct_star_schema_config_file(executer_instance, cubes_obj): ...@@ -47,17 +51,22 @@ def _construct_star_schema_config_file(executer_instance, cubes_obj):
executer_instance.facts = cubes_obj.facts[0].table_name executer_instance.facts = cubes_obj.facts[0].table_name
db = MyDB(db=executer_instance.cube) db = MyDB(db=executer_instance.cube)
# load facts table # load facts table
fusion = psql.read_sql_query(
"SELECT * FROM {0}".format(executer_instance.facts), db.engine)
for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items(): memory_usage("1 - before executing query //// _construct_star_schema_config_file")
df = psql.read_sql_query( with db.engine as connection:
"SELECT * FROM {0}".format(dimension_and_key.split('.')[0]), fusion = psql.read_sql_query(
db.connection) "SELECT * FROM {0}".format(executer_instance.facts), connection)
fusion = fusion.merge( for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
df, left_on=fact_key, right_on=dimension_and_key.split('.')[1]) df = psql.read_sql_query(
"SELECT * FROM {0}".format(dimension_and_key.split('.')[0]),
connection)
fusion = fusion.merge(
df, left_on=fact_key, right_on=dimension_and_key.split('.')[1])
memory_usage("2 - after query, before fetchall /////// _construct_star_schema_config_file")
# TODO CHOSE BETWEEN THOSES DF # TODO CHOSE BETWEEN THOSES DF
# if separated dimensions # if separated dimensions
# fusion = fusion.merge(df, left_on=fact_key,right_on=dimension_and_key.split('.')[1]) # fusion = fusion.merge(df, left_on=fact_key,right_on=dimension_and_key.split('.')[1])
...@@ -90,10 +99,16 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj): ...@@ -90,10 +99,16 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
if cubes_obj.facts[0].columns: if cubes_obj.facts[0].columns:
all_columns += cubes_obj.facts[0].columns all_columns += cubes_obj.facts[0].columns
memory_usage("1 - before executing query //// 1111 _construct_web_star_schema_config_file ")
fusion = psql.read_sql_query( fusion = psql.read_sql_query(
"SELECT * FROM {0}".format(executer_instance.facts), db.engine) "SELECT * FROM {0}".format(executer_instance.facts), db.engine)
memory_usage("2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file")
tables = {} tables = {}
memory_usage("1 - before executing query //// 3333333333 _construct_web_star_schema_config_file ")
for table in cubes_obj.tables: for table in cubes_obj.tables:
tab = psql.read_sql_query("SELECT * FROM {0}".format(table.name), tab = psql.read_sql_query("SELECT * FROM {0}".format(table.name),
...@@ -118,11 +133,14 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj): ...@@ -118,11 +133,14 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
all_columns += list(tab.columns) all_columns += list(tab.columns)
tables.update({table.name: tab}) tables.update({table.name: tab})
memory_usage("2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file")
# measures in config-file only # measures in config-file only
if cubes_obj.facts[0].measures: if cubes_obj.facts[0].measures:
executer_instance.measures = cubes_obj.facts[0].measures executer_instance.measures = cubes_obj.facts[0].measures
all_columns += cubes_obj.facts[0].measures all_columns += cubes_obj.facts[0].measures
memory_usage("1 - before executing query //// 55555555 _construct_web_star_schema_config_file ")
for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items(): for fact_key, dimension_and_key in cubes_obj.facts[0].keys.items():
dimension_name = dimension_and_key.split('.')[0] dimension_name = dimension_and_key.split('.')[0]
if dimension_name in tables.keys(): if dimension_name in tables.keys():
...@@ -138,5 +156,7 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj): ...@@ -138,5 +156,7 @@ def _construct_web_star_schema_config_file(executer_instance, cubes_obj):
# remove suffixe from dimension and keep the same column name for facts # remove suffixe from dimension and keep the same column name for facts
suffixes=('', '_y')) suffixes=('', '_y'))
memory_usage("2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file")
return fusion[[column for column in all_columns if 'id' != column[-2:]]] return fusion[[column for column in all_columns if 'id' != column[-2:]]]
...@@ -2,6 +2,7 @@ from __future__ import absolute_import, division, print_function ...@@ -2,6 +2,7 @@ from __future__ import absolute_import, division, print_function
from sqlalchemy import inspect from sqlalchemy import inspect
from ..tools.mem_bench import memory_usage
from ..tools.connection import MyDB from ..tools.connection import MyDB
import pandas.io.sql as psql import pandas.io.sql as psql
...@@ -16,6 +17,7 @@ def _load_tables_db(executer_instance): ...@@ -16,6 +17,7 @@ def _load_tables_db(executer_instance):
db = MyDB(db_config_file_path=executer_instance.DATA_FOLDER,db=executer_instance.cube) db = MyDB(db_config_file_path=executer_instance.DATA_FOLDER,db=executer_instance.cube)
inspector = inspect(db.engine) inspector = inspect(db.engine)
memory_usage("1 - before executing query //// _load_tables_db")
for table_name in inspector.get_table_names(): for table_name in inspector.get_table_names():
value = psql.read_sql_query( value = psql.read_sql_query(
'SELECT * FROM "{0}"'.format(table_name), db.engine) 'SELECT * FROM "{0}"'.format(table_name), db.engine)
...@@ -23,6 +25,7 @@ def _load_tables_db(executer_instance): ...@@ -23,6 +25,7 @@ def _load_tables_db(executer_instance):
tables[table_name] = value[[ tables[table_name] = value[[
col for col in value.columns if col.lower()[-3:] != '_id' col for col in value.columns if col.lower()[-3:] != '_id'
]] ]]
memory_usage("2 - after query, before fetchall /////// _load_tables_db")
return tables return tables
...@@ -34,20 +37,22 @@ def _construct_star_schema_db(executer_instance): ...@@ -34,20 +37,22 @@ def _construct_star_schema_db(executer_instance):
:return: star schema DataFrame :return: star schema DataFrame
""" """
db = MyDB(db=executer_instance.cube) db = MyDB(db=executer_instance.cube)
memory_usage("1 - before executing query //// _construct_star_schema_db")
# load facts table # load facts table
fusion = psql.read_sql_query( with db.engine as connection:
'SELECT * FROM "{0}" '.format(executer_instance.facts), db.engine) fusion = psql.read_sql_query(
'SELECT * FROM "{0}" '.format(executer_instance.facts), connection)
inspector = inspect(db.engine)
inspector = inspect(connection)
for db_table_name in inspector.get_table_names():
try: for db_table_name in inspector.get_table_names():
fusion = fusion.merge( try:
psql.read_sql_query("SELECT * FROM {0}".format( fusion = fusion.merge(
db_table_name[0]), db.engine)) psql.read_sql_query("SELECT * FROM {0}".format(
except: db_table_name[0]), connection))
print('No common column') except:
pass print('No common column')
pass
memory_usage("2 - after query, before fetchall /////// _construct_star_schema_db")
return fusion return fusion
Memory summary:1 - before executing query //// _load_tables_db
VM: 3160.52Mb
Memory summary:2 - after query, before fetchall /////// _load_tables_db
VM: 3268.52Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 3268.52Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 3269.02Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 3269.02Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 3270.77Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 3270.77Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 1111 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 222222222222 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 3333333333 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 44444444 _construct_star_schema_config_file
VM: 3271.02Mb
Memory summary:1 - before executing query //// 55555555 _construct_web_star_schema_config_file
VM: 3271.02Mb
Memory summary:2 - after query, before fetchall /////// 6666666666 _construct_star_schema_config_file
VM: 3271.02Mb
import os
from os.path import expanduser
from pympler import summary, muppy
import psutil
def get_virtual_memory_usage_kb():
"""
The process's current virtual memory size in Kb, as a float.
"""
return float(psutil.Process().memory_info_ex().vms) / 1024.0
def memory_usage(where):
"""
Print out a basic summary of memory usage.
"""
with open(os.path.join(expanduser('~'), 'bech_mem.txt'), mode='a+') as file:
mem_summary = summary.summarize(muppy.get_objects())
file.write("Memory summary:" + where + '\n\n')
summary.print_(mem_summary, limit=2)
file.write("VM: %.2fMb" % (get_virtual_memory_usage_kb() / 1024.0) + '\n\n')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment