Commit f819cac8 authored by stewart@willster.(none)'s avatar stewart@willster.(none)

Merge willster.(none):/home/stewart/Documents/MySQL/4.1/ndb

into  willster.(none):/home/stewart/Documents/MySQL/4.1/ndb_size_51
parents f8fdc3bc 628e9ccb
...@@ -57,7 +57,7 @@ if(@ARGV < 3 || $ARGV[0] eq '--usage' || $ARGV[0] eq '--help') ...@@ -57,7 +57,7 @@ if(@ARGV < 3 || $ARGV[0] eq '--usage' || $ARGV[0] eq '--help')
$template->param(dsn => $dsn); $template->param(dsn => $dsn);
} }
my @releases = ({rel=>'4.1'},{rel=>'5.0'},{rel=>'5.1'}); my @releases = ({rel=>'4.1'},{rel=>'5.0'},{rel=>'5.1'}); #,{rel=>'5.1-dd'});
$template->param(releases => \@releases); $template->param(releases => \@releases);
my $tables = $dbh->selectall_arrayref("show tables"); my $tables = $dbh->selectall_arrayref("show tables");
...@@ -81,25 +81,29 @@ sub align { ...@@ -81,25 +81,29 @@ sub align {
return @aligned; return @aligned;
} }
foreach(@{$tables}) sub do_table {
{ my $table= shift;
my $table= @{$_}[0]; my $info= shift;
my %indexes= %{$_[0]};
my @count= @{$_[1]};
my @columns; my @columns;
my $info= $dbh->selectall_hashref('describe `'.$table.'`',"Field");
my @count = $dbh->selectrow_array('select count(*) from `'.$table.'`');
my %columnsize; # used for index calculations my %columnsize; # used for index calculations
# We now work out the DataMemory usage # We now work out the DataMemory usage
# sizes for 4.1, 5.0, 5.1 # sizes for 4.1, 5.0, 5.1 and 5.1-dd
my @totalsize= (0,0,0); my @totalsize= (0,0,0,0);
@totalsize= @totalsize[0..$#releases]; # limit to releases we're outputting
my $nrvarsize= 0;
foreach(keys %$info) foreach(keys %$info)
{ {
my @realsize = (0,0,0); my @realsize = (0,0,0,0);
my @varsize = (0,0,0,0);
my $type; my $type;
my $size; my $size;
my $name= $_; my $name= $_;
my $is_varsize= 0;
if($$info{$_}{Type} =~ /^(.*?)\((\d+)\)/) if($$info{$_}{Type} =~ /^(.*?)\((\d+)\)/)
{ {
...@@ -112,54 +116,86 @@ foreach(@{$tables}) ...@@ -112,54 +116,86 @@ foreach(@{$tables})
} }
if($type =~ /tinyint/) if($type =~ /tinyint/)
{@realsize=(1,1,1)} {@realsize=(1,1,1,1)}
elsif($type =~ /smallint/) elsif($type =~ /smallint/)
{@realsize=(2,2,2)} {@realsize=(2,2,2,2)}
elsif($type =~ /mediumint/) elsif($type =~ /mediumint/)
{@realsize=(3,3,3)} {@realsize=(3,3,3,3)}
elsif($type =~ /bigint/) elsif($type =~ /bigint/)
{@realsize=(8,8,8)} {@realsize=(8,8,8,8)}
elsif($type =~ /int/) elsif($type =~ /int/)
{@realsize=(4,4,4)} {@realsize=(4,4,4,4)}
elsif($type =~ /float/) elsif($type =~ /float/)
{ {
if($size<=24) if($size<=24)
{@realsize=(4,4,4)} {@realsize=(4,4,4,4)}
else else
{@realsize=(8,8,8)} {@realsize=(8,8,8,8)}
} }
elsif($type =~ /double/ || $type =~ /real/) elsif($type =~ /double/ || $type =~ /real/)
{@realsize=(8,8,8)} {@realsize=(8,8,8,8)}
elsif($type =~ /bit/) elsif($type =~ /bit/)
{ {
my $a=($size+7)/8; my $a=($size+7)/8;
@realsize = ($a,$a,$a); @realsize = ($a,$a,$a,$a);
} }
elsif($type =~ /datetime/) elsif($type =~ /datetime/)
{@realsize=(8,8,8)} {@realsize=(8,8,8,8)}
elsif($type =~ /timestamp/) elsif($type =~ /timestamp/)
{@realsize=(4,4,4)} {@realsize=(4,4,4,4)}
elsif($type =~ /date/ || $type =~ /time/) elsif($type =~ /date/ || $type =~ /time/)
{@realsize=(3,3,3)} {@realsize=(3,3,3,3)}
elsif($type =~ /year/) elsif($type =~ /year/)
{@realsize=(1,1,1)} {@realsize=(1,1,1,1)}
elsif($type =~ /varchar/ || $type =~ /varbinary/) elsif($type =~ /varchar/ || $type =~ /varbinary/)
{ {
my $fixed= 1+$size; my $fixed=$size+ceil($size/256);
my @dynamic=$dbh->selectrow_array("select avg(length(`" my @dynamic=$dbh->selectrow_array("select avg(length(`"
.$name .$name
."`)) from `".$table.'`'); ."`)) from `".$table.'`');
$dynamic[0]=0 if !$dynamic[0]; $dynamic[0]=0 if !$dynamic[0];
@realsize= ($fixed,$fixed,ceil($dynamic[0])); $dynamic[0]+=ceil($dynamic[0]/256); # size bit
$nrvarsize++;
$is_varsize= 1;
$varsize[3]= ceil($dynamic[0]);
@realsize= ($fixed,$fixed,ceil($dynamic[0]),$fixed);
} }
elsif($type =~ /binary/ || $type =~ /char/) elsif($type =~ /binary/ || $type =~ /char/)
{@realsize=($size,$size,$size)} {@realsize=($size,$size,$size,$size)}
elsif($type =~ /text/ || $type =~ /blob/) elsif($type =~ /text/ || $type =~ /blob/)
{ {
@realsize=(256,256,1); @realsize=(8+256,8+256,8+256,8+256);
$NoOfTables[$_]{val} += 1 foreach 0..$#releases; # blob uses table
} # FIXME check if 5.1 is correct my $blobhunk= 2000;
$blobhunk= 8000 if $type=~ /longblob/;
$blobhunk= 4000 if $type=~ /mediumblob/;
my @blobsize=$dbh->selectrow_array("select SUM(CEILING(".
"length(`$name`)/$blobhunk))".
"from `".$table."`");
$blobsize[0]=0 if !defined($blobsize[0]);
#$NoOfTables[$_]{val} += 1 foreach 0..$#releases; # blob uses table
do_table($table."\$BLOB_$name",
{'PK'=>{Type=>'int'},
'DIST'=>{Type=>'int'},
'PART'=>{Type=>'int'},
'DATA'=>{Type=>"binary($blobhunk)"}
},
{'PRIMARY' => {
'unique' => 1,
'comment' => '',
'columns' => [
'PK',
'DIST',
'PART',
],
'type' => 'HASH'
}
},
\@blobsize);
}
@realsize= @realsize[0..$#releases];
@realsize= align(4,@realsize); @realsize= align(4,@realsize);
$totalsize[$_]+=$realsize[$_] foreach 0..$#totalsize; $totalsize[$_]+=$realsize[$_] foreach 0..$#totalsize;
...@@ -170,6 +206,7 @@ foreach(@{$tables}) ...@@ -170,6 +206,7 @@ foreach(@{$tables})
push @columns, { push @columns, {
name=>$name, name=>$name,
type=>$type, type=>$type,
is_varsize=>$is_varsize,
size=>$size, size=>$size,
key=>$$info{$_}{Key}, key=>$$info{$_}{Key},
datamemory=>\@realout, datamemory=>\@realout,
...@@ -183,24 +220,10 @@ foreach(@{$tables}) ...@@ -183,24 +220,10 @@ foreach(@{$tables})
# Firstly, we assemble some information about the indexes. # Firstly, we assemble some information about the indexes.
# We use SHOW INDEX instead of using INFORMATION_SCHEMA so # We use SHOW INDEX instead of using INFORMATION_SCHEMA so
# we can still connect to pre-5.0 mysqlds. # we can still connect to pre-5.0 mysqlds.
my %indexes;
{
my $sth= $dbh->prepare("show index from `".$table.'`');
$sth->execute;
while(my $i = $sth->fetchrow_hashref)
{
$indexes{${%$i}{Key_name}}= {
type=>${%$i}{Index_type},
unique=>!${%$i}{Non_unique},
comment=>${%$i}{Comment},
} if !defined($indexes{${%$i}{Key_name}});
$indexes{${%$i}{Key_name}}{columns}[${%$i}{Seq_in_index}-1]=
${%$i}{Column_name};
}
}
if(!defined($indexes{PRIMARY})) { if(!defined($indexes{PRIMARY})) {
my @usage= ({val=>8},{val=>8},{val=>8},{val=>8});
@usage= @usage[0..$#releases];
$indexes{PRIMARY}= { $indexes{PRIMARY}= {
type=>'BTREE', type=>'BTREE',
unique=>1, unique=>1,
...@@ -212,20 +235,22 @@ foreach(@{$tables}) ...@@ -212,20 +235,22 @@ foreach(@{$tables})
type=>'bigint', type=>'bigint',
size=>8, size=>8,
key=>'PRI', key=>'PRI',
datamemory=>[{val=>8},{val=>8},{val=>8}], datamemory=>\@usage,
}; };
$columnsize{'HIDDEN_NDB_PKEY'}= [8,8,8]; $columnsize{'HIDDEN_NDB_PKEY'}= [8,8,8];
} }
my @IndexDataMemory= ({val=>0},{val=>0},{val=>0}); my @IndexDataMemory= ({val=>0},{val=>0},{val=>0},{val=>0});
my @RowIndexMemory= ({val=>0},{val=>0},{val=>0}); my @RowIndexMemory= ({val=>0},{val=>0},{val=>0},{val=>0});
@IndexDataMemory= @IndexDataMemory[0..$#releases];
@RowIndexMemory= @RowIndexMemory[0..$#releases];
my @indexes; my @indexes;
foreach my $index (keys %indexes) { foreach my $index (keys %indexes) {
my $im41= 25; my $im41= 25;
$im41+=$columnsize{$_}[0] foreach @{$indexes{$index}{columns}}; $im41+=$columnsize{$_}[0] foreach @{$indexes{$index}{columns}};
my @im = ({val=>$im41},{val=>25},{val=>25}); my @im = ({val=>$im41},{val=>25},{val=>25}); #,{val=>25});
my @dm = ({val=>10},{val=>10},{val=>10}); my @dm = ({val=>10},{val=>10},{val=>10}); #,{val=>10});
push @indexes, { push @indexes, {
name=>$index, name=>$index,
type=>$indexes{$index}{type}, type=>$indexes{$index}{type},
...@@ -233,13 +258,22 @@ foreach(@{$tables}) ...@@ -233,13 +258,22 @@ foreach(@{$tables})
indexmemory=>\@im, indexmemory=>\@im,
datamemory=>\@dm, datamemory=>\@dm,
}; };
$IndexDataMemory[$_]{val}+=$dm[$_]{val} foreach 0..2; $IndexDataMemory[$_]{val}+=$dm[$_]{val} foreach 0..$#releases;
$RowIndexMemory[$_]{val}+=$im[$_]{val} foreach 0..2; $RowIndexMemory[$_]{val}+=$im[$_]{val} foreach 0..$#releases;
} }
# total size + 16 bytes overhead # total size + 16 bytes overhead
my @TotalDataMemory; my @TotalDataMemory;
$TotalDataMemory[$_]{val}=$IndexDataMemory[$_]{val}+$totalsize[$_]+16 foreach 0..2; my @RowOverhead = ({val=>16},{val=>16},{val=>16}); #,{val=>24});
# 5.1 has ptr to varsize page, and per-varsize overhead
my @nrvarsize_mem= ({val=>0},{val=>0},
{val=>8}); #,{val=>0});
{
my @a= align(4,$nrvarsize*2);
$nrvarsize_mem[2]{val}+=$a[0]+$nrvarsize*4;
}
$TotalDataMemory[$_]{val}=$IndexDataMemory[$_]{val}+$totalsize[$_]+$RowOverhead[$_]{val}+$nrvarsize_mem[$_]{val} foreach 0..$#releases;
my @RowDataMemory; my @RowDataMemory;
push @RowDataMemory,{val=>$_} foreach @totalsize; push @RowDataMemory,{val=>$_} foreach @totalsize;
...@@ -260,12 +294,18 @@ foreach(@{$tables}) ...@@ -260,12 +294,18 @@ foreach(@{$tables})
my @counts; my @counts;
$counts[$_]{val}= $count foreach 0..$#releases; $counts[$_]{val}= $count foreach 0..$#releases;
my @nrvarsize_rel= ({val=>0},{val=>0},
{val=>$nrvarsize}); #,{val=>0});
push @table_size, { push @table_size, {
table=>$table, table=>$table,
indexes=>\@indexes, indexes=>\@indexes,
columns=>\@columns, columns=>\@columns,
count=>\@counts, count=>\@counts,
RowOverhead=>\@RowOverhead,
RowDataMemory=>\@RowDataMemory, RowDataMemory=>\@RowDataMemory,
nrvarsize=>\@nrvarsize_rel,
nrvarsize_mem=>\@nrvarsize_mem,
releases=>\@releases, releases=>\@releases,
IndexDataMemory=>\@IndexDataMemory, IndexDataMemory=>\@IndexDataMemory,
TotalDataMemory=>\@TotalDataMemory, TotalDataMemory=>\@TotalDataMemory,
...@@ -283,6 +323,31 @@ foreach(@{$tables}) ...@@ -283,6 +323,31 @@ foreach(@{$tables})
$NoOfIndexes[$_]{val} += @indexes foreach 0..$#releases; $NoOfIndexes[$_]{val} += @indexes foreach 0..$#releases;
} }
foreach(@{$tables})
{
my $table= @{$_}[0];
my $info= $dbh->selectall_hashref('describe `'.$table.'`',"Field");
my @count = $dbh->selectrow_array('select count(*) from `'.$table.'`');
my %indexes;
{
my $sth= $dbh->prepare("show index from `".$table.'`');
$sth->execute;
while(my $i = $sth->fetchrow_hashref)
{
$indexes{${%$i}{Key_name}}= {
type=>${%$i}{Index_type},
unique=>!${%$i}{Non_unique},
comment=>${%$i}{Comment},
} if !defined($indexes{${%$i}{Key_name}});
$indexes{${%$i}{Key_name}}{columns}[${%$i}{Seq_in_index}-1]=
${%$i}{Column_name};
}
}
do_table($table, $info, \%indexes, \@count);
}
my @NoOfTriggers; my @NoOfTriggers;
# for unique hash indexes # for unique hash indexes
$NoOfTriggers[$_]{val} += $NoOfIndexes[$_]{val}*3 foreach 0..$#releases; $NoOfTriggers[$_]{val} += $NoOfIndexes[$_]{val}*3 foreach 0..$#releases;
......
...@@ -15,6 +15,8 @@ td,th { border: 1px solid black } ...@@ -15,6 +15,8 @@ td,th { border: 1px solid black }
<p>This information should be valid for MySQL 4.1 and 5.0. Since 5.1 is not a final release yet, the numbers should be used as a guide only.</p> <p>This information should be valid for MySQL 4.1 and 5.0. Since 5.1 is not a final release yet, the numbers should be used as a guide only.</p>
<p>5.1-dd is for tables stored on disk. The ndb_size.pl estimates are <b>experimental</b> and should not be trusted. Notably we don't take into account indexed columns being in DataMemory versus non-indexed on disk.</p>
<h2>Parameter Settings</h2> <h2>Parameter Settings</h2>
<p><b>NOTE</b> the configuration parameters below do not take into account system tables and other requirements.</p> <p><b>NOTE</b> the configuration parameters below do not take into account system tables and other requirements.</p>
<table> <table>
...@@ -69,6 +71,7 @@ td,th { border: 1px solid black } ...@@ -69,6 +71,7 @@ td,th { border: 1px solid black }
<tr> <tr>
<th>Column</th> <th>Column</th>
<th>Type</th> <th>Type</th>
<th>VARSIZE</th>
<th>Size</th> <th>Size</th>
<th>Key</th> <th>Key</th>
<TMPL_LOOP NAME=releases> <TMPL_LOOP NAME=releases>
...@@ -79,6 +82,7 @@ td,th { border: 1px solid black } ...@@ -79,6 +82,7 @@ td,th { border: 1px solid black }
<tr> <tr>
<td><TMPL_VAR NAME=name></td> <td><TMPL_VAR NAME=name></td>
<td><TMPL_VAR NAME=type></td> <td><TMPL_VAR NAME=type></td>
<td><TMPL_IF NAME=is_varsize>YES<TMPL_ELSE>&nbsp;</TMPL_IF></td>
<td><TMPL_VAR NAME=size></td> <td><TMPL_VAR NAME=size></td>
<td><TMPL_VAR NAME=key></td> <td><TMPL_VAR NAME=key></td>
<TMPL_LOOP NAME=datamemory> <TMPL_LOOP NAME=datamemory>
...@@ -128,10 +132,22 @@ td,th { border: 1px solid black } ...@@ -128,10 +132,22 @@ td,th { border: 1px solid black }
<th><TMPL_VAR NAME=rel></th> <th><TMPL_VAR NAME=rel></th>
</TMPL_LOOP> </TMPL_LOOP>
</tr> </tr>
<tr>
<th>Nr Varsized Attributes</th>
<TMPL_LOOP NAME=nrvarsize>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr> <tr>
<th>Row Overhead</th> <th>Row Overhead</th>
<TMPL_LOOP NAME=releases> <TMPL_LOOP NAME=RowOverhead>
<td>16</td> <td><TMPL_VAR NAME=val></td>
</TMPL_LOOP>
</tr>
<tr>
<th>Varsized Overhead</th>
<TMPL_LOOP NAME=nrvarsize_mem>
<td><TMPL_VAR NAME=val></td>
</TMPL_LOOP> </TMPL_LOOP>
</tr> </tr>
<tr> <tr>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment