#!/usr/bin/perl

# Simple perl script to take portable SQL
# and generate database specific DDL

use strict;

sub usage {
  return <<EOF;
$0: Usage:

  $0 DBNAME [[DESTINATION] SOURCEFILE]

where DBNAME is DB2, postgres, postgres_preSERIAL, sqlite, oracle or mysql
EOF
}

my $dbtarget = shift || die usage();
   $dbtarget =~ m/^(db2|oracle|sqlite|postgres|postgres_preSERIAL|mysql)$/ || die usage();
   $dbtarget = lc($dbtarget);

my $inputfile = shift;

my $outputfile = shift;
if ($outputfile) {
  open(OUTPUT, ">$outputfile") || die "Cannot open '$outputfile': $!\n";
  select(OUTPUT);
}

# put inputfile back in ARGV so we can just use <>
unshift(@ARGV, $inputfile);


sub to_sqlite {
  s/BOOLEAN/SMALLINT/g;
  s/TINYINT/SMALLINT/g;
  s/MEDIUMTEXT/TEXT/g;
  s/SERIAL//g;
}

sub to_oracle {
  s/TEXT/CLOB/g;
  s/VARCHAR/VARCHAR2/g;
  s/TINYINT/NUMBER(3,0)/g;
  s/SMALLINT/NUMBER(6,0)/g;
  s/INTEGER/NUMBER(20,0)/g;
  s/TIMESTAMP/DATE/g;
  s/SERIAL//g;
  if (/^CREATE TABLE (\w+) \(/) {
    $_ .= <<SEQ;

CREATE SEQUENCE ${1}_seq;
SEQ
  }
  s/(\s)(?:TEXT|VARCHAR)(\s*)\((\d+)\)/
       my($space1, $space2, $textlen) = ($1, $2, $3);

       my $type;
       if ($textlen < 4001) {
            $type = 'VARCHAR2';
            $space1 . $type . $space2 . '(' . $textlen . ')';
       } else {
            $type = 'CLOB';
            $space1 . $type . $space2;
       }
  /ge;
  s/(\s)BLOB(\s*)\((\d+)\)/
       my($space1, $space2, $bloblen) = ($1, $2, $3);
       $space1 . 'BLOB' . $space2;
  /ge;    
  # default TEXT/BLOB limit is 65535
  s/(\s)TEXT/$1CLOB/g;
  s/(\s)BLOB/$1BLOB/g;
}

#sub to_postgres {
#  # datatypes
#  s/BLOB/BYTEA/g;
#  s/TIMESTAMP/TIMESTAMP WITH TIME ZONE/g;
#  s/INTEGER(.*)SERIAL/SERIAL $1/g;
#  s/TINYINT/SMALLINT/g;
#}

sub to_postgres {
  # datatypes
  s/BLOB/BYTEA/g;
  s/TINYINT/SMALLINT/g;
  s/BOOLEAN/SMALLINT/g;
  s/MEDIUMINT/INTEGER/g;
  s/MEDIUMTEXT/TEXT/g;
  s/TIMESTAMP/TIMESTAMP WITH TIME ZONE/g;
  if (/^CREATE TABLE (\w+) \(/) {
      $_ .= <<SEQ;

CREATE SEQUENCE ${1}_id;
SEQ
  }
  s/SERIAL//;
}

# Convert to DB2 DDL format

sub to_db2 {
  s/TEXT/CLOB(128k)/g;

  # handle autoincrement
  s/SERIAL(.*),/$1 GENERATED BY DEFAULT AS IDENTITY,/;

  # handle 18 char limitation.. grr
  if (m/CREATE INDEX ([a-z0-9_]+)/) {
      my $index_name = $1;
      if (length($index_name) > 17) {
	  # get rid of vowels
	  $index_name =~ s/[aeiou]//g;
	  if (length($index_name) > 17) {
	      # umm, truncate it?
	      $index_name = substr($index_name, 0,17);
	  }
          $index_name =~ s/_$//;

	  s/CREATE INDEX ([a-z0-9_]+)/CREATE INDEX $index_name/;
      }
  }
  # handle 30 character column name limit, barf...
  if (m/^\s+([a-z_]+)\s+(VARCHAR|TEXT|CHAR|INTEGER)/) {
     if (length($1) > 30) {
        # At this time we hack this to reduce _display_ to _disp_
        # new sql code with long column names will have to be dealt with
        unless (s/_display_/_disp_/) {
	  die "Found long column name $1 that we can't deal with\n";
        }
     }
  }

}

# Convert to MySQL DDL format

sub to_mysql {
  s/(\s)(?:TEXT|VARCHAR)(\s*)\((\d+)\)/
       my($space1, $space2, $textlen) = ($1, $2, $3);

       my $type;
       if ($textlen < 256) {
            $type = 'VARCHAR';
       } elsif ($textlen < 65536) {
            $type = 'TEXT';
       } elsif ($textlen < 16777216) {
            $type = 'MEDIUMTEXT';
       } else {
            $type = 'LONGTEXT';
       }
       $space1 . $type . $space2 . '(' . $textlen . ')';
  /ge;
  s/(\s)BLOB(\s*)\((\d+)\)/
       my($space1, $space2, $bloblen) = ($1, $2, $3);
       my $type;
       if ($bloblen < 256) {
            $type = 'TINYBLOB';
       } elsif ($bloblen < 65536) {
            $type = 'MEDIUMBLOB';
       } elsif ($bloblen < 16777216) {
            $type = 'LONGBLOB';
       }
       $space1 . $type . $space2 . '(' . $bloblen . ')';
  /ge;

  # default TEXT/BLOB limit is 65535
  #s/(\s)TEXT/$1MEDIUMTEXT/g;
  #s/(\s)BLOB/$1MEDIUMBLOB/g;

  # TIMESTAMP should become DATETIME, because we don't want it to
  # auto-update (the drivers will handle that).
  s/_created_on\s+TIMESTAMP/_created_on DATETIME/g; # except for the modified_on fields
  s/blog_children_modified_on\s+TIMESTAMP/blog_children_modified_on DATETIME/g; # except for the modified_on fields

  # handle autoincrement
  s/SERIAL/AUTO_INCREMENT/;
}

my $conv_func = 'to_' . $dbtarget;

local $/ = ';';

while (<>) {
  s/^\-.*$//mg;  ## Strip comments on their own lines.
  s/^\s*//;
  s/\s*$//;
  # can't hurt, saves keystrokes
  s/PRIMARY KEY/PRIMARY KEY NOT NULL/;
  no strict 'refs';
  &{$conv_func};
  use strict 'refs';
  print $_, "\n";
}

# post stage..  Write out sequences if needed.
