[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GT] SVN Commit r589 - in trunk/GT: . DB
Author: thomas
Date: 2008-03-23 05:39:32 +0100 (Sun, 23 Mar 2008)
New Revision: 589
Modified:
trunk/GT/DB/HTTP.pm
trunk/GT/DB/Text.pm
trunk/GT/Prices.pm
Log:
Restore faster date formats. Reuse between DB::Text and DB::HTTP.
Modified: trunk/GT/DB/HTTP.pm
===================================================================
--- trunk/GT/DB/HTTP.pm 2008-03-22 06:02:29 UTC (rev 588)
+++ trunk/GT/DB/HTTP.pm 2008-03-23 04:39:32 UTC (rev 589)
@@ -26,10 +26,17 @@
=head2 Configuration
-You must set some configuration items in ~/.gt/options, especially for authentification purpose.
+Most configuration items have default values, to alter these defaults
+you must indicate the configuration item and its value in your
+$HOME/.gt/options file, especially for authentification purpose.
=over
+=item DB::module HTTP
+
+Informs gt you are using the HTTP.pm module. This
+configuration item is always required in your $HOME/.gt/options file.
+
=item DB::HTTP::url : The URL that will be requested to download
=item DB::HTTP::location : The location of the server (www.geniustrader.org)
@@ -40,6 +47,58 @@
=item DB::HTTP::password : The password (ie : anonymous)
+=item DB::HTTP::marker string
+
+Delimits fields in each row of the data file.
+The marker defaults to the tab character '\t'.
+
+=item DB::HTTP::header_lines number
+
+The number of header lines in your data file
+that are to be skipped during processing. Lines with the either the
+comment symbol '#' or the less than symbol '<' as the first character
+do not need to be included in this value.. The header_lines default value is 0.
+
+=item DB::HTTP::format 0|1|2|3 (default is 3)
+The format of the date/time string. Valid values are:
+0 - yyyy-mm-dd hh:nn:ss (the time string is optional)
+1 - US Format (month before day, any format understood by Date::Calc)
+2 - European Format (day before month, any format understood by Date::Calc)
+3 - Any format understood by Date::Manip
+
+=item DB::HTTP::fields::datetime number
+
+Column index where to find the period datetime
+field. Indexes are 0 based. For the particular case of datetime, can contain
+multiple indexes, useful when date and time are separate columns in the data
+file. The date time format is anything that can be understood by Date::Manip.
+A typical example would be YYYY-MM-DD HH:NN:SS. The default datetime index is 5.
+
+=item DB::HTTP::fields::open number
+
+Column index where to find the period open field.
+Indexes are 0 based. The default open index is 0.
+
+=item DB::HTTP::fields::low number
+
+Column index where to find the period low field.
+Indexes are 0 based. The default low index is 2.
+
+=item DB::HTTP::fields::high number
+
+Column index where to find the period high field.
+Indexes are 0 based. The default high index is 1.
+
+=item DB::HTTP::fields::close number
+
+Column index where to find the period close field.
+Indexes are 0 based. The default close index is 3.
+
+=item DB::HTTP::fields::volume number
+
+Column index where to find the period volume field.
+Indexes are 0 based. The default volume index is 4.
+
=back
You can set the DB::HTTP::directory configuration item to tell where
@@ -61,8 +120,28 @@
GT::Conf::default("DB::HTTP::directory",
GT::Conf::_get_home_path() . "/.gt/http-db-cache");
-
+ GT::Conf::default('DB::HTTP::header_lines', '0');
+ GT::Conf::default('DB::HTTP::marker', "\t");
+ GT::Conf::default('DB::HTTP::file_extension', '.txt');
+ GT::Conf::default('DB::HTTP::format', '3');
+ GT::Conf::default('DB::HTTP::fields::datetime', '5');
+ GT::Conf::default('DB::HTTP::fields::open', '0');
+ GT::Conf::default('DB::HTTP::fields::low', '2');
+ GT::Conf::default('DB::HTTP::fields::high', '1');
+ GT::Conf::default('DB::HTTP::fields::close', '3');
+ GT::Conf::default('DB::HTTP::fields::volume', '4');
+
my $self = { "directory" => GT::Conf::get("DB::HTTP::directory"),
+ "header_lines" => GT::Conf::get('DB::HTTP::header_lines'),
+ "mark" => GT::Conf::get('DB::HTTP::marker'),
+ "date_format" => GT::Conf::get('DB::HTTP::format'),
+ "extension" => GT::Conf::get('DB::HTTP::file_extension'),
+ "datetime" => GT::Conf::get('DB::HTTP::fields::datetime'),
+ "open" => GT::Conf::get('DB::HTTP::fields::open'),
+ "low" => GT::Conf::get('DB::HTTP::fields::low'),
+ "high" => GT::Conf::get('DB::HTTP::fields::high'),
+ "close" => GT::Conf::get('DB::HTTP::fields::close'),
+ "volume" => GT::Conf::get('DB::HTTP::fields::volume'),
"url" => GT::Conf::get("DB::HTTP::url"),
"location" => GT::Conf::get("DB::HTTP::location"),
"zone" => GT::Conf::get("DB::HTTP::zone"),
@@ -93,37 +172,6 @@
}
-=item C<< $db->set_options($mark, $date_format, %fields) >>
-
-Set up all available options required to load text files.
-
-By default :
-
- - Mark is a tabulation ("\t")
- - Date Format
- 0 : GeniusTrader Date Format
- 1 : US sort of Date Format
- 2 : EU sort of Date Format
- - Fields Map
- %fields = ('open' => 0, 'high' => 1, 'low' => 2, 'close' => 3,
- %'volume' => 4, 'date' => 5);
-
-=cut
-sub set_options {
- my ($self, $mark, $date_format, %fields) = @_;
-
- if ($mark) { $self->{'mark'} = $mark; }
- if ($date_format) {$self->{'date_format'} = $date_format; }
- if (%fields) {
- $self->{'open'} = $fields{'open'};
- $self->{'high'} = $fields{'high'};
- $self->{'low'} = $fields{'low'};
- $self->{'close'} = $fields{'close'};
- $self->{'volume'} = $fields{'volume'};
- $self->{'date'} = $fields{'date'};
- }
-}
-
=item C<< $db->get_prices($code, $timeframe) >>
Returns a GT::Prices object containing all known prices for the symbol $code.
@@ -131,7 +179,7 @@
=cut
sub get_prices {
my ($self, $code, $timeframe) = @_;
- $timeframe = $DAY unless ($timeframe);
+ $timeframe = $DAY unless ($timeframe);
die "Intraday support not implemented in DB::HTTP" if ($timeframe < $DAY);
return GT::Prices->new() if ($timeframe > $DAY);
@@ -139,23 +187,24 @@
$prices->set_timeframe($timeframe);
if (!$self->{'mark'}) { $self->{'mark'} = "\t"; }
- if (!$self->{'date_format'}) { $self->{'date_format'} = 0; }
+ if (!$self->{'date_format'}) { $self->{'date_format'} = 3; }
+ if (!$self->{'header_lines'}) { $self->{'header_lines'} = 0; }
if (!$self->{'open'}) { $self->{'open'} = 0; }
if (!$self->{'high'}) { $self->{'high'} = 1; }
if (!$self->{'low'}) { $self->{'low'} = 2; }
if (!$self->{'close'}) { $self->{'close'} = 3; }
if (!$self->{'volume'}) { $self->{'volume'} = 4; }
- if (!$self->{'date'}) { $self->{'date'} = 5; }
+ if (!$self->{'date'}) { $self->{'datetime'} = 5; }
my %fields = ('open' => $self->{'open'}, 'high' => $self->{'high'},
'low' => $self->{'low'}, 'close' => $self->{'close'},
- 'volume' => $self->{'volume'}, 'date' => $self->{'date'});
+ 'volume' => $self->{'volume'}, 'date' => $self->{'datetime'});
$self->{'fields'} = \%fields;
my $file = $self->download_prices($code);
$prices->loadtxt($file, $self->{'mark'}, $self->{'date_format'},
- %fields);
+ $self->{'header_lines'}, %fields);
return $prices;
}
Modified: trunk/GT/DB/Text.pm
===================================================================
--- trunk/GT/DB/Text.pm 2008-03-22 06:02:29 UTC (rev 588)
+++ trunk/GT/DB/Text.pm 2008-03-23 04:39:32 UTC (rev 589)
@@ -14,7 +14,6 @@
use GT::Prices;
use GT::Conf;
use GT::DateTime;
-use Date::Manip;
=head1 DB::Text access module
@@ -29,21 +28,33 @@
you must indicate the configuration item and its value in your
$HOME/.gt/options file.
-DB::module Text -- informs gt you are using the Text.pm module. This
+=over
+
+=item DB::module Text
+
+Informs gt you are using the Text.pm module. This
configuration item is always required in your $HOME/.gt/options file.
-DB::text::directory path -- where files are stored. This
+=item DB::text::directory path
+
+where files are stored. This
configuration item is always required in your $HOME/.gt/options file.
-DB::text::marker string -- which delimits fields in each row of the data file.
+=item DB::text::marker string
+
+Delimits fields in each row of the data file.
The marker defaults to the tab character '\t'.
-DB::Text::header_lines number -- The number of header lines in your data file
+=item DB::text::header_lines number
+
+The number of header lines in your data file
that are to be skipped during processing. Lines with the either the
comment symbol '#' or the less than symbol '<' as the first character
do not need to be included in this value.. The header_lines default value is 0.
-DB::text::file_extension string -- to be appended to the code file name when
+=item DB::text::file_extension string
+
+To be appended to the code file name when
searching the data file. For instance, if the data file is called EURUSD.csv
this variable would have the value '.csv' (without the quotes).
@@ -52,27 +63,46 @@
if you have data in different timeframes, for instance, EURUSD_hour.csv and
EURUSD_day.csv, use the following value for this directive:
-DB::text::file_extension _$timeframe.csv
+=item DB::text::file_extension _$timeframe.csv
-DB::text::fields::datetime number -- Column index where to find the period datetime
+=item DB::text::format 0|1|2|3 (default is 3)
+The format of the date/time string. Valid values are:
+0 - yyyy-mm-dd hh:nn:ss (the time string is optional)
+1 - US Format (month before day, any format understood by Date::Calc)
+2 - European Format (day before month, any format understood by Date::Calc)
+3 - Any format understood by Date::Manip
+
+=item DB::text::fields::datetime number
+
+Column index where to find the period datetime
field. Indexes are 0 based. For the particular case of datetime, can contain
multiple indexes, useful when date and time are separate columns in the data
file. The date time format is anything that can be understood by Date::Manip.
A typical example would be YYYY-MM-DD HH:NN:SS. The default datetime index is 5.
-DB::text::fields::open number -- Column index where to find the period open field.
+=item DB::text::fields::open number
+
+Column index where to find the period open field.
Indexes are 0 based. The default open index is 0.
-DB::text::fields::low number -- Column index where to find the period low field.
+=item DB::text::fields::low number
+
+Column index where to find the period low field.
Indexes are 0 based. The default low index is 2.
-DB::text::fields::high number -- Column index where to find the period high field.
+=item DB::text::fields::high number
+
+Column index where to find the period high field.
Indexes are 0 based. The default high index is 1.
-DB::text::fields::close number -- Column index where to find the period close field.
+=item DB::text::fields::close number
+
+Column index where to find the period close field.
Indexes are 0 based. The default close index is 3.
-DB::text::fields::volume number -- Column index where to find the period volume field.
+=item DB::text::fields::volume number
+
+Column index where to find the period volume field.
Indexes are 0 based. The default volume index is 4.
@@ -91,6 +121,7 @@
GT::Conf::default('DB::Text::header_lines', '0');
GT::Conf::default('DB::Text::marker', "\t");
GT::Conf::default('DB::Text::file_extension', '.txt');
+ GT::Conf::default('DB::Text::format', '3');
GT::Conf::default('DB::Text::fields::datetime', '5');
GT::Conf::default('DB::Text::fields::open', '0');
GT::Conf::default('DB::Text::fields::low', '2');
@@ -100,6 +131,7 @@
$self->{'header_lines'} = GT::Conf::get('DB::Text::header_lines');
$self->{'mark'} = GT::Conf::get('DB::Text::marker');
+ $self->{'date_format'} = GT::Conf::get('DB::Text::format');
$self->{'extension'} = GT::Conf::get('DB::Text::file_extension');
$self->{'datetime'} = GT::Conf::get('DB::Text::fields::datetime');
$self->{'open'} = GT::Conf::get('DB::Text::fields::open');
@@ -144,65 +176,34 @@
return GT::Prices->new() if ($timeframe > $DAY);
- my @datetime_fields = split(',',$self->{'datetime'});
- my $datetime_fields_count = scalar(@datetime_fields);
-
my $prices = GT::Prices->new;
$prices->set_timeframe($timeframe);
+ if (!$self->{'mark'}) { $self->{'mark'} = "\t"; }
+ if (!$self->{'date_format'}) { $self->{'date_format'} = 3; }
+ if (!$self->{'header_lines'}) { $self->{'header_lines'} = 0; }
+ if (!$self->{'open'}) { $self->{'open'} = 0; }
+ if (!$self->{'high'}) { $self->{'high'} = 1; }
+ if (!$self->{'low'}) { $self->{'low'} = 2; }
+ if (!$self->{'close'}) { $self->{'close'} = 3; }
+ if (!$self->{'volume'}) { $self->{'volume'} = 4; }
+ if (!$self->{'datetime'}) { $self->{'datetime'} = 5; }
+
+ my %fields = ('open' => $self->{'open'}, 'high' => $self->{'high'},
+ 'low' => $self->{'low'}, 'close' => $self->{'close'},
+ 'volume' => $self->{'volume'}, 'date' => $self->{'datetime'});
+ $self->{'fields'} = \%fields;
+
my $extension = $self->{'extension'};
my $tfname = GT::DateTime::name_of_timeframe($timeframe);
$extension =~ s/\$timeframe/$tfname/g;
my $file = $self->{'directory'} . "/$code" . $extension;
- #open(FILE, "<$file") || (warn "Can't open $file: $!\n" and return GT::Prices->new());
- open(FILE, "<", "$file") || (warn "Can't open $file: $!\n" and return GT::Prices->new());
+ $prices->loadtxt($file, $self->{'mark'}, $self->{'date_format'},
+ $self->{'header_lines'}, %fields);
+ return $prices;
- my ($open, $high, $low, $close, $volume, $date);
- my ($year, $month, $day);
-
- my $lines_to_skip = $self->{'header_lines'};
-
- #TODO
- #Date::Manip requires this to be defined
- #there probably is a better way of doing this
- #rather than defining it here, but it works
- #for now
- $ENV{'TZ'} = 'GMT' unless(defined($ENV{'TZ'}));
-
- # Process each line in $file...
- while (defined($_=<FILE>))
- {
- # Skip user specified number of file header lines
- if ( $lines_to_skip > 0 ) {
- $lines_to_skip--;
- next;
- }
-
- next if (/^[#<]/); #Skip comments and METASTOCK ascii file header
- # Get and split the line with $mark
- chomp;
- my @line = split($self->{'mark'});
-
- # Get and swap all necessary fields according to the fields map
- $open = $line[$self->{'open'}];
- $high = $line[$self->{'high'}];
- $low = $line[$self->{'low'}];
- $close = $line[$self->{'close'}];
- $volume = $line[$self->{'volume'}] or $volume = 0; #some datasets don't include volume
- my $datetime=$line[$datetime_fields[0]];
- for (my $i=1; $i<$datetime_fields_count;$i++) {
- $datetime .= ' '.$line[$datetime_fields[$i]];
- }
- $date = &UnixDate($datetime, '%Y-%m-%d %H:%M:%S');
-
- # Add all data within the GT::Prices object
- $prices->add_prices([ $open, $high, $low, $close, $volume, $date ]);
- }
- close FILE;
-
- return $prices;
}
=pod
Modified: trunk/GT/Prices.pm
===================================================================
--- trunk/GT/Prices.pm 2008-03-22 06:02:29 UTC (rev 588)
+++ trunk/GT/Prices.pm 2008-03-23 04:39:32 UTC (rev 589)
@@ -11,6 +11,7 @@
#ALL# use Log::Log4perl qw(:easy);
use GT::DateTime;
use GT::Serializable;
+use Date::Manip;
require Exporter;
@ISA = qw(Exporter GT::Serializable);
@@ -285,12 +286,14 @@
=cut
sub loadtxt {
- my ($self, $file, $mark, $date_format, %fields) = @_;
+ my ($self, $file, $mark, $date_format, $skip, %fields) = @_;
open(FILE, '<', "$file") || die "Can't open $file: $!\n";
+# unless(open(FILE, '<', "$file")} || (warn "Can't open $file: $!\n" and return;
+
$self->{'prices'} = [];
my ($open, $high, $low, $close, $volume, $date);
- my ($year, $month, $day);
+ my ($year, $month, $day, $tm);
# Initialize all options with the default settings
# Set up $mark as a tabulation
@@ -307,9 +310,20 @@
# Process each line in $file...
while (defined($_=<FILE>))
{
+ # Skip user specified number of file header lines
+ if ( $skip > 0 ) {
+ $skip--;
+ next;
+ }
+
# ... only if it's a line without strings (ie: everything but head line)
- if (!/\G[A-Za-z]/gc) {
+ next if (/^[#<]/); #Skip comments and METASTOCK ascii file header
+ #next if (/\G[A-Za-z]/gc); #Skip all lines containing text strings
+ #NOTE: The first does not skip typical headers; the second does
+ # not allow textual dates.
+ if (!/date/ig) {
+
# Get and split the line with $mark
chomp;
my @line = split("$mark");
@@ -319,14 +333,20 @@
$high = $line[$fields{'high'}];
$low = $line[$fields{'low'}];
$close = $line[$fields{'close'}];
- $volume = $line[$fields{'volume'}];
- $date = $line[$fields{'date'}];
+ $volume = $line[$fields{'volume'}] || 0;
+ my @datetime_fields = split(',',$fields{'date'});
+ my $datetime_fields_count = scalar(@datetime_fields);
+ my $date=$line[$datetime_fields[0]];
+ for (my $i=1; $i<$datetime_fields_count;$i++) {
+ $date .= ' '.$line[$datetime_fields[$i]];
+ }
# Decode the date from the text file to something useable
# The hh:nn:ss part is optional
# $date_format eq 0 : GeniusTrader Date Format (yyyy-mm-dd hh:nn:ss)
- # $date_format eq 1 : US sort of Date Format (mm/dd/yyyy)
- # $date_format eq 2 : EU sort of Date Format (dd/mm/yyyy)
+ # $date_format eq 1 : US sort of Date Format (month before day)
+ # $date_format eq 2 : EU sort of Date Format (day before month)
+ # $date_format eq 3 : Any format understood by Date::Manip
if ($date_format != 0) {
@@ -336,13 +356,35 @@
if ($date_format eq 2) {
($year, $month, $day) = Decode_Date_EU($date);
}
+ if ($date_format eq 3) {
+ #Date::Manip requires this to be defined
+ #there probably is a better way of doing this
+ #rather than defining it here, but it works
+ #for now
+ $ENV{'TZ'} = 'GMT' unless(defined($ENV{'TZ'}));
+ my $udate = &UnixDate($date, '%Y-%m-%d %H:%M:%S');
+ unless (defined $udate) {
+ warn "Incorrect date for format $date_format: $date.\n";
+ next;
+ }
+ ( $year, $month, $day, $tm ) = split /[- ]/, $udate;
+ }
+ unless (defined $year) {
+ warn "Incorrect date for format $date_format: $date.\n";
+ next;
+ }
my ($today_year, $today_month, $today_day) = Today();
if ($year > $today_year) {
$year -= 100;
}
- $month = "0" . $month if $month < 10;
- $day = "0" . $day if $day < 10;
- $date = $year . "-" . $month . "-" .$day;
+ # Time::Local only works for dates within 50 years
+ next if $year <= $today_year - 50;
+ unless ($date_format eq 3) {
+ $month = '0' . $month if $month < 10;
+ $day = '0' . $day if $day < 10;
+ }
+ $date = $year . '-' . $month . '-' .$day;
+ $date .= " $tm" if $tm;
}
# Add all data within the GT::Prices object