[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GT] SVN Commit r589 - in trunk/GT: . DB



Author: thomas
Date: 2008-03-23 05:39:32 +0100 (Sun, 23 Mar 2008)
New Revision: 589

Modified:
   trunk/GT/DB/HTTP.pm
   trunk/GT/DB/Text.pm
   trunk/GT/Prices.pm
Log:
Restore faster date formats. Reuse between DB::Text and DB::HTTP.

Modified: trunk/GT/DB/HTTP.pm
===================================================================
--- trunk/GT/DB/HTTP.pm	2008-03-22 06:02:29 UTC (rev 588)
+++ trunk/GT/DB/HTTP.pm	2008-03-23 04:39:32 UTC (rev 589)
@@ -26,10 +26,17 @@
 
 =head2 Configuration
 
-You must set some configuration items in ~/.gt/options, especially for authentification purpose.
+Most configuration items have default values, to alter these defaults
+you must indicate the configuration item and its value in your
+$HOME/.gt/options file, especially for authentification purpose.
 
 =over
 
+=item DB::module	HTTP
+
+Informs gt you are using the HTTP.pm module. This
+configuration item is always required in your $HOME/.gt/options file.
+
 =item DB::HTTP::url : The URL that will be requested to download
 
 =item DB::HTTP::location : The location of the server (www.geniustrader.org)
@@ -40,6 +47,58 @@
 
 =item DB::HTTP::password : The password (ie : anonymous)
 
+=item DB::HTTP::marker	string 
+
+Delimits fields in each row of the data file.
+The marker defaults to the tab character '\t'.
+
+=item DB::HTTP::header_lines	number
+
+The number of header lines in your data file
+that are to be skipped during processing. Lines with the either the
+comment symbol '#' or the less than symbol '<' as the first character
+do not need to be included in this value.. The header_lines default value is 0.
+
+=item DB::HTTP::format                0|1|2|3 (default is 3)
+The format of the date/time string. Valid values are: 
+0 - yyyy-mm-dd hh:nn:ss (the time string is optional)
+1 - US Format (month before day, any format understood by Date::Calc)
+2 - European Format (day before month, any format understood by Date::Calc)
+3 - Any format understood by Date::Manip
+
+=item DB::HTTP::fields::datetime	number
+
+Column index where to find the period datetime
+field. Indexes are 0 based.  For the particular case of datetime, can contain
+multiple indexes, useful when date and time are separate columns in the data
+file.  The date time format is anything that can be understood by Date::Manip.
+A typical example would be YYYY-MM-DD HH:NN:SS. The default datetime index is 5.
+
+=item DB::HTTP::fields::open	number
+
+Column index where to find the period open field.
+Indexes are 0 based. The default open index is 0.
+
+=item DB::HTTP::fields::low	number
+
+Column index where to find the period low field.
+Indexes are 0 based. The default low index is 2. 
+
+=item DB::HTTP::fields::high	number
+
+Column index where to find the period high field.
+Indexes are 0 based. The default high index is 1.
+
+=item DB::HTTP::fields::close	number
+
+Column index where to find the period close field.
+Indexes are 0 based. The default close index is 3.
+
+=item DB::HTTP::fields::volume	number
+
+Column index where to find the period volume field.
+Indexes are 0 based. The default volume index is 4.
+
 =back
 
 You can set the DB::HTTP::directory configuration item to tell where
@@ -61,8 +120,28 @@
 
     GT::Conf::default("DB::HTTP::directory",
 		      GT::Conf::_get_home_path() . "/.gt/http-db-cache");
-    
+    GT::Conf::default('DB::HTTP::header_lines', '0');
+    GT::Conf::default('DB::HTTP::marker', "\t");
+    GT::Conf::default('DB::HTTP::file_extension', '.txt');
+    GT::Conf::default('DB::HTTP::format', '3');
+    GT::Conf::default('DB::HTTP::fields::datetime', '5');
+    GT::Conf::default('DB::HTTP::fields::open', '0');
+    GT::Conf::default('DB::HTTP::fields::low', '2');
+    GT::Conf::default('DB::HTTP::fields::high', '1');
+    GT::Conf::default('DB::HTTP::fields::close', '3');
+    GT::Conf::default('DB::HTTP::fields::volume', '4');
+
     my $self = { "directory" => GT::Conf::get("DB::HTTP::directory"),
+		 "header_lines" => GT::Conf::get('DB::HTTP::header_lines'),
+		 "mark" => GT::Conf::get('DB::HTTP::marker'),
+		 "date_format" => GT::Conf::get('DB::HTTP::format'),
+		 "extension" => GT::Conf::get('DB::HTTP::file_extension'),
+		 "datetime" => GT::Conf::get('DB::HTTP::fields::datetime'),
+		 "open" => GT::Conf::get('DB::HTTP::fields::open'),
+		 "low" => GT::Conf::get('DB::HTTP::fields::low'),
+		 "high" => GT::Conf::get('DB::HTTP::fields::high'),
+		 "close" => GT::Conf::get('DB::HTTP::fields::close'),
+		 "volume" => GT::Conf::get('DB::HTTP::fields::volume'),
                  "url" => GT::Conf::get("DB::HTTP::url"),
                  "location" => GT::Conf::get("DB::HTTP::location"),
                  "zone" => GT::Conf::get("DB::HTTP::zone"),
@@ -93,37 +172,6 @@
 }
 
 
-=item C<< $db->set_options($mark, $date_format, %fields) >>
-
-Set up all available options required to load text files.
-
-By default :
-
- - Mark is a tabulation ("\t")
- - Date Format
-    0 : GeniusTrader Date Format
-    1 : US sort of Date Format
-    2 : EU sort of Date Format
- - Fields Map
-     %fields = ('open' => 0, 'high' => 1, 'low' => 2, 'close' => 3,
-     %'volume' => 4, 'date' => 5);
-
-=cut
-sub set_options {
-    my ($self, $mark, $date_format, %fields) = @_;
-
-    if ($mark) { $self->{'mark'} = $mark; }
-    if ($date_format) {$self->{'date_format'} = $date_format; }
-    if (%fields) {
-	$self->{'open'} = $fields{'open'};
-	$self->{'high'} = $fields{'high'};
-	$self->{'low'} = $fields{'low'};
-	$self->{'close'} = $fields{'close'};
-	$self->{'volume'} = $fields{'volume'};
-	$self->{'date'} = $fields{'date'};
-    }
-}
-
 =item C<< $db->get_prices($code, $timeframe) >>
 
 Returns a GT::Prices object containing all known prices for the symbol $code.
@@ -131,7 +179,7 @@
 =cut
 sub get_prices {
     my ($self, $code, $timeframe) = @_;
-	$timeframe = $DAY unless ($timeframe);
+    $timeframe = $DAY unless ($timeframe);
     die "Intraday support not implemented in DB::HTTP" if ($timeframe < $DAY);
     return GT::Prices->new() if ($timeframe > $DAY);
 
@@ -139,23 +187,24 @@
     $prices->set_timeframe($timeframe);
 
     if (!$self->{'mark'}) { $self->{'mark'} = "\t"; }
-    if (!$self->{'date_format'}) { $self->{'date_format'} = 0; }
+    if (!$self->{'date_format'}) { $self->{'date_format'} = 3; }
+    if (!$self->{'header_lines'}) { $self->{'header_lines'} = 0; }
     if (!$self->{'open'}) { $self->{'open'} = 0; }
     if (!$self->{'high'}) { $self->{'high'} = 1; }
     if (!$self->{'low'}) { $self->{'low'} = 2; }
     if (!$self->{'close'}) { $self->{'close'} = 3; }
     if (!$self->{'volume'}) { $self->{'volume'} = 4; }
-    if (!$self->{'date'}) { $self->{'date'} = 5; }
+    if (!$self->{'date'}) { $self->{'datetime'} = 5; }
  
     my %fields = ('open' => $self->{'open'}, 'high' => $self->{'high'},
                   'low' => $self->{'low'}, 'close' => $self->{'close'},
-		  'volume' => $self->{'volume'}, 'date' => $self->{'date'});
+		  'volume' => $self->{'volume'}, 'date' => $self->{'datetime'});
     $self->{'fields'} = \%fields;
 
     my $file = $self->download_prices($code);
 		  
     $prices->loadtxt($file, $self->{'mark'}, $self->{'date_format'},
-		     %fields);
+		     $self->{'header_lines'}, %fields);
     return $prices;
 }
 

Modified: trunk/GT/DB/Text.pm
===================================================================
--- trunk/GT/DB/Text.pm	2008-03-22 06:02:29 UTC (rev 588)
+++ trunk/GT/DB/Text.pm	2008-03-23 04:39:32 UTC (rev 589)
@@ -14,7 +14,6 @@
 use GT::Prices;
 use GT::Conf;
 use GT::DateTime;
-use Date::Manip;
 
 =head1 DB::Text access module
 
@@ -29,21 +28,33 @@
 you must indicate the configuration item and its value in your
 $HOME/.gt/options file.
 
-DB::module	Text -- informs gt you are using the Text.pm module. This
+=over
+
+=item DB::module	Text
+
+Informs gt you are using the Text.pm module. This
 configuration item is always required in your $HOME/.gt/options file.
 
-DB::text::directory	path -- where files are stored. This
+=item DB::text::directory	path
+
+where files are stored. This
 configuration item is always required in your $HOME/.gt/options file.
 
-DB::text::marker	string -- which delimits fields in each row of the data file.
+=item DB::text::marker	string 
+
+Delimits fields in each row of the data file.
 The marker defaults to the tab character '\t'.
 
-DB::Text::header_lines	number -- The number of header lines in your data file
+=item DB::text::header_lines	number
+
+The number of header lines in your data file
 that are to be skipped during processing. Lines with the either the
 comment symbol '#' or the less than symbol '<' as the first character
 do not need to be included in this value.. The header_lines default value is 0.
 
-DB::text::file_extension	string -- to be appended to the code file name when 
+=item DB::text::file_extension	string
+
+To be appended to the code file name when 
 searching the data file.  For instance, if the data file is called EURUSD.csv
 this variable would have the value '.csv' (without the quotes).
 
@@ -52,27 +63,46 @@
 if you have data in different timeframes, for instance, EURUSD_hour.csv and
 EURUSD_day.csv, use the following value for this directive:
 
-DB::text::file_extension	_$timeframe.csv
+=item DB::text::file_extension	_$timeframe.csv
 
-DB::text::fields::datetime	number -- Column index where to find the period datetime
+=item DB::text::format                0|1|2|3 (default is 3)
+The format of the date/time string. Valid values are: 
+0 - yyyy-mm-dd hh:nn:ss (the time string is optional)
+1 - US Format (month before day, any format understood by Date::Calc)
+2 - European Format (day before month, any format understood by Date::Calc)
+3 - Any format understood by Date::Manip
+
+=item DB::text::fields::datetime	number
+
+Column index where to find the period datetime
 field. Indexes are 0 based.  For the particular case of datetime, can contain
 multiple indexes, useful when date and time are separate columns in the data
 file.  The date time format is anything that can be understood by Date::Manip.
 A typical example would be YYYY-MM-DD HH:NN:SS. The default datetime index is 5.
 
-DB::text::fields::open	number -- Column index where to find the period open field.
+=item DB::text::fields::open	number
+
+Column index where to find the period open field.
 Indexes are 0 based. The default open index is 0.
 
-DB::text::fields::low	number -- Column index where to find the period low field.
+=item DB::text::fields::low	number
+
+Column index where to find the period low field.
 Indexes are 0 based. The default low index is 2. 
 
-DB::text::fields::high	number -- Column index where to find the period high field.
+=item DB::text::fields::high	number
+
+Column index where to find the period high field.
 Indexes are 0 based. The default high index is 1.
 
-DB::text::fields::close	number -- Column index where to find the period close field.
+=item DB::text::fields::close	number
+
+Column index where to find the period close field.
 Indexes are 0 based. The default close index is 3.
 
-DB::text::fields::volume	number -- Column index where to find the period volume field.
+=item DB::text::fields::volume	number
+
+Column index where to find the period volume field.
 Indexes are 0 based. The default volume index is 4.
 
 
@@ -91,6 +121,7 @@
     GT::Conf::default('DB::Text::header_lines', '0');
     GT::Conf::default('DB::Text::marker', "\t");
     GT::Conf::default('DB::Text::file_extension', '.txt');
+    GT::Conf::default('DB::Text::format', '3');
     GT::Conf::default('DB::Text::fields::datetime', '5');
     GT::Conf::default('DB::Text::fields::open', '0');
     GT::Conf::default('DB::Text::fields::low', '2');
@@ -100,6 +131,7 @@
 
     $self->{'header_lines'} = GT::Conf::get('DB::Text::header_lines');
     $self->{'mark'} = GT::Conf::get('DB::Text::marker');
+    $self->{'date_format'} = GT::Conf::get('DB::Text::format');
     $self->{'extension'} = GT::Conf::get('DB::Text::file_extension');
     $self->{'datetime'} = GT::Conf::get('DB::Text::fields::datetime');
     $self->{'open'} = GT::Conf::get('DB::Text::fields::open');
@@ -144,65 +176,34 @@
 
     return GT::Prices->new() if ($timeframe > $DAY);
 
-    my @datetime_fields = split(',',$self->{'datetime'});
-    my $datetime_fields_count = scalar(@datetime_fields);
-
     my $prices = GT::Prices->new;
     $prices->set_timeframe($timeframe);
 
+    if (!$self->{'mark'}) { $self->{'mark'} = "\t"; }
+    if (!$self->{'date_format'}) { $self->{'date_format'} = 3; }
+    if (!$self->{'header_lines'}) { $self->{'header_lines'} = 0; }
+    if (!$self->{'open'}) { $self->{'open'} = 0; }
+    if (!$self->{'high'}) { $self->{'high'} = 1; }
+    if (!$self->{'low'}) { $self->{'low'} = 2; }
+    if (!$self->{'close'}) { $self->{'close'} = 3; }
+    if (!$self->{'volume'}) { $self->{'volume'} = 4; }
+    if (!$self->{'datetime'}) { $self->{'datetime'} = 5; }
+ 
+    my %fields = ('open' => $self->{'open'}, 'high' => $self->{'high'},
+                  'low' => $self->{'low'}, 'close' => $self->{'close'},
+		  'volume' => $self->{'volume'}, 'date' => $self->{'datetime'});
+    $self->{'fields'} = \%fields;
+
     my $extension = $self->{'extension'};
     my $tfname = GT::DateTime::name_of_timeframe($timeframe);
     $extension =~ s/\$timeframe/$tfname/g;
 
     my $file = $self->{'directory'} . "/$code" . $extension;
 
-    #open(FILE, "<$file") || (warn "Can't open $file: $!\n" and return GT::Prices->new());
-    open(FILE, "<", "$file") || (warn "Can't open $file: $!\n" and return GT::Prices->new());
+    $prices->loadtxt($file, $self->{'mark'}, $self->{'date_format'},
+		     $self->{'header_lines'}, %fields);
+    return $prices;
 
-    my ($open, $high, $low, $close, $volume, $date);
-    my ($year, $month, $day);
-
-    my $lines_to_skip = $self->{'header_lines'};
-    
-    #TODO
-    #Date::Manip requires this to be defined
-    #there probably is a better way of doing this
-    #rather than defining it here, but it works
-    #for now
-    $ENV{'TZ'} = 'GMT' unless(defined($ENV{'TZ'})); 
-
-    # Process each line in $file...
-    while (defined($_=<FILE>))
-    {
-        # Skip user specified number of file header lines
-        if ( $lines_to_skip > 0 ) {
-            $lines_to_skip--;
-            next;
-        }
-        
-        next if (/^[#<]/); #Skip comments and METASTOCK ascii file header
-        # Get and split the line with $mark
-        chomp;
-        my @line = split($self->{'mark'});
-
-        # Get and swap all necessary fields according to the fields map
-        $open = $line[$self->{'open'}];
-        $high = $line[$self->{'high'}];
-        $low = $line[$self->{'low'}];
-        $close = $line[$self->{'close'}];
-        $volume = $line[$self->{'volume'}] or $volume = 0; #some datasets don't include volume
-        my $datetime=$line[$datetime_fields[0]];
-        for (my $i=1; $i<$datetime_fields_count;$i++) {
-             $datetime .= ' '.$line[$datetime_fields[$i]];
-        }
-        $date = &UnixDate($datetime, '%Y-%m-%d %H:%M:%S');
-
-        # Add all data within the GT::Prices object
-        $prices->add_prices([ $open, $high, $low, $close, $volume, $date ]);
-    }
-    close FILE;
-
-    return $prices;
 }
 
 =pod

Modified: trunk/GT/Prices.pm
===================================================================
--- trunk/GT/Prices.pm	2008-03-22 06:02:29 UTC (rev 588)
+++ trunk/GT/Prices.pm	2008-03-23 04:39:32 UTC (rev 589)
@@ -11,6 +11,7 @@
 #ALL#  use Log::Log4perl qw(:easy);
 use GT::DateTime;
 use GT::Serializable;
+use Date::Manip;
 
 require Exporter;
 @ISA = qw(Exporter GT::Serializable);
@@ -285,12 +286,14 @@
 
 =cut
 sub loadtxt {
-    my ($self, $file, $mark, $date_format, %fields) = @_;
+    my ($self, $file, $mark, $date_format, $skip, %fields) = @_;
 
     open(FILE, '<', "$file") || die "Can't open $file: $!\n";
+#   unless(open(FILE, '<', "$file")} || (warn "Can't open $file: $!\n" and return;
+
     $self->{'prices'} = [];
     my ($open, $high, $low, $close, $volume, $date);
-    my ($year, $month, $day);
+    my ($year, $month, $day, $tm);
 
     # Initialize all options with the default settings
     # Set up $mark as a tabulation
@@ -307,9 +310,20 @@
     # Process each line in $file...
     while (defined($_=<FILE>))
     {
+        # Skip user specified number of file header lines
+        if ( $skip > 0 ) {
+            $skip--;
+            next;
+        }
+        
 	# ... only if it's a line without strings (ie: everything but head line)
-	if (!/\G[A-Za-z]/gc) {
+        next if (/^[#<]/); #Skip comments and METASTOCK ascii file header
+        #next if (/\G[A-Za-z]/gc);  #Skip all lines containing text strings
+        #NOTE: The first does not skip typical headers; the second does
+        #      not allow textual dates.
 
+	if (!/date/ig) {
+
 	    # Get and split the line with $mark
 	    chomp;
 	    my @line = split("$mark");
@@ -319,14 +333,20 @@
 	    $high = $line[$fields{'high'}];
 	    $low = $line[$fields{'low'}];
 	    $close = $line[$fields{'close'}];
-	    $volume = $line[$fields{'volume'}];
-	    $date = $line[$fields{'date'}];
+	    $volume = $line[$fields{'volume'}] || 0;
+	    my @datetime_fields = split(',',$fields{'date'});
+	    my $datetime_fields_count = scalar(@datetime_fields);
+	    my $date=$line[$datetime_fields[0]];
+	    for (my $i=1; $i<$datetime_fields_count;$i++) {
+	      $date .= ' '.$line[$datetime_fields[$i]];
+	    }
 
 	    # Decode the date from the text file to something useable
 		# The hh:nn:ss part is optional
 	    # $date_format eq 0 : GeniusTrader Date Format (yyyy-mm-dd hh:nn:ss)
-	    # $date_format eq 1 : US sort of Date Format   (mm/dd/yyyy)
-	    # $date_format eq 2 : EU sort of Date Format   (dd/mm/yyyy)
+	    # $date_format eq 1 : US sort of Date Format   (month before day)
+	    # $date_format eq 2 : EU sort of Date Format   (day before month)
+	    # $date_format eq 3 : Any format understood by Date::Manip
 	    
 	    if ($date_format != 0) {
 		
@@ -336,13 +356,35 @@
 		if ($date_format eq 2) {
 		    ($year, $month, $day) = Decode_Date_EU($date);
 		}
+		if ($date_format eq 3) {
+		  #Date::Manip requires this to be defined
+		  #there probably is a better way of doing this
+		  #rather than defining it here, but it works
+		  #for now
+		  $ENV{'TZ'} = 'GMT' unless(defined($ENV{'TZ'})); 
+		  my $udate = &UnixDate($date, '%Y-%m-%d %H:%M:%S');
+		  unless (defined $udate) {
+		    warn "Incorrect date for format $date_format: $date.\n";
+		    next;
+		  }
+		  ( $year, $month, $day, $tm ) = split /[- ]/, $udate;
+		}
+		unless (defined $year) {
+		  warn "Incorrect date for format $date_format: $date.\n";
+		  next;
+		}
 		my ($today_year, $today_month, $today_day) = Today();
 		if ($year > $today_year) {
 		    $year -= 100;
 		}
-		$month = "0" . $month if $month < 10;
-		$day = "0" . $day if $day < 10;
-		$date = $year . "-" . $month . "-" .$day;
+		# Time::Local only works for dates within 50 years
+		next if $year <= $today_year - 50;
+		unless ($date_format eq 3) {
+		  $month = '0' . $month if $month < 10;
+		  $day = '0' . $day if $day < 10;
+		}
+		$date = $year . '-' . $month . '-' .$day;
+		$date .= " $tm" if $tm;
 	    }
 
 	    # Add all data within the GT::Prices object