#!/usr/bin/env perl
use strict;
use warnings;
use Carp;
use DBI;
#use Module::Versions::Report;
sub stampedMsg {
#print $_[0], " ", `ps --no-headers -o time,size -p $$`;
print $_[0], " ", `date`;
}
# Default values for command line arguments.
my $maxItems = 1000000000;
#my $maxItems = 100;
my $dbFnameRoot = "tokenDB";
my ($tzr, $iDoc, $id_doc, $i, $ofst, $len, $tok);
my $id_lxm = 0; # used by declareMention
$tzr = createTokenDB($dbFnameRoot);
for ($iDoc = 1; $iDoc <= $maxItems; ++$iDoc) {
$id_doc = $iDoc;
$i = 1000;
while ($i) {
$ofst = 0;
$len = 0;
$tok = 'ab';
declareMention($tzr, "TERM", $tok, $id_doc, $ofst, $len);
--$i;
}
stampedMsg "Loaded Doc $iDoc," if $iDoc % 100 == 0;
}
closeTokenDB($tzr);
exit(0);
###############################################################################
sub createTokenDB {
my ($fNameRoot) = @_;
my $tzrState = {};
my $dbFname = $fNameRoot . ".SQLite";
my $dbh = createDB($dbFname);
dbCmd($dbh, "begin");
$tzrState->{"dbh"} = $dbh;
# nTransact < 0 signifies safe to issue "begin" statement.
$tzrState->{"nTransact"} = -1;
$tzrState->{"nTransactCommit"} = 10000;
my $type = 'TERM';
my $menTblName = "mentions_$type";
dbCmd($dbh, "create table $menTblName (id_mtn integer primary key,
id_lxm int, id_doc int, ofst int, len int)");
dbCmd($dbh, "commit");
return $tzrState;
}
sub closeTokenDB {
my ($tzrState) = @_;
my $dbh = $tzrState->{"dbh"};
if($tzrState->{"nTransact"} >= 0) {dbCmd($tzrState->{"dbh"}, "commit");}
$tzrState->{"dbh"}->disconnect;
}
sub declareMention {
my ($tzrState, $type, $nm, $id_doc, $ofst, $len) = @_;
my $dbh = $tzrState->{"dbh"};
if($tzrState->{"nTransact"} < 0) {
dbCmd0($dbh, "begin");
$tzrState->{"nTransact"} = 0;
}
if(++$tzrState->{"nTransact"} > $tzrState->{"nTransactCommit"}) {
dbCmd($dbh, "commit"); $tzrState->{"nTransact"} = -1;
}
$id_lxm++;
dbCmd0($dbh, "insert into mentions_". $type ."(id_doc,id_lxm,ofst,len) ".
"values($id_doc, $id_lxm, $ofst, $len)");
}
###############################################################################
# Open DB (deleting old file if any). Set pragmas for speed.
sub createDB {
my ($dbName) = @_;
if(-e($dbName)) {print("Deleting $dbName\n");}
if(-e($dbName)) {unlink $dbName;}
if(-e($dbName ."-journal")) {unlink($dbName ."-journal");}
print("Creating $dbName\n");
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbName", '', '');
confess "Failed to create to DB '$dbName'" unless $dbh;
$dbh->{RaiseError} = 1;
print("Created $dbName\n");
# Performance enhancements:
dbCmd($dbh, "pragma synchronous=OFF");
# This should make the process size about 150M. Default was 2K==> 3M.
#dbCmd($dbh, "pragma cache_size=100000");
return $dbh;
}
# Print SQL command and execute it.
sub dbCmd {
my ($dbh, $cmd) = @_;
print($cmd, "\n");
my $sth = $dbh->prepare($cmd);
$sth->execute;
return;
}
# Execute SQL command without printing it.
sub dbCmd0 {
my ($dbh, $cmd) = @_;
my $sth = $dbh->prepare($cmd);
$sth->execute;
return;
}
###############################################################################