package ScatterPlot; #use 5.008001; use strict; use warnings; require Exporter; our @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use SCatterPlot ':all'; # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK # will save memory. our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( ); our $VERSION = '0.01'; # constructor sub new { # declare the class name and assign it the input parameter my ($class_name) = @_; # create the new variable, its a hash my ($self) = []; # bless it to be an object within class $class_name bless ($self, $class_name); # return the hash and exit return $self; } # draw an ASCII plot sub draw { # declare local copy of self and sport, assigning with input paramters my ($self, $xy_points, $x_size, $y_size, $x_label, $y_label, $char, $type, $reg_calc) = @_; # default variables unless ($type) { $type = 'text'; } unless ($char) { $char = 'o'; } unless ($y_label) { $y_label = ''; } unless ($x_label) { $x_label = ''; } unless ($y_size) { $y_size = 26; } else { $y_size--; } unless ($x_size) { $x_size = 60; } unless ($xy_points) { my @xy_points = (); my $i_max = 20; for (my $i=0; $i<$i_max; $i++) { $xy_points[$i][0] = ($i - ($i_max - 1) / 2 ) * 6 / $i_max; $xy_points[$i][1] = ($xy_points[$i][0] + 2) * ($xy_points[$i][0] - 2) * $xy_points[$i][0]; } $xy_points = \@xy_points; } unless ($reg_calc) { $reg_calc = 0; } # find the number of points to plot my $num_points = @$xy_points; # loop thru the points and find min/max values my $x_min = $$xy_points[0][0]; my $x_max = $$xy_points[0][0]; my $y_min = $$xy_points[0][1]; my $y_max = $$xy_points[0][1]; for (my $j=1; $j<$num_points; $j++) { if (($$xy_points[$j][0]||$$xy_points[$j][0]==0) and ($$xy_points[$j][1]||$$xy_points[$j][1]==0)) { if ($$xy_points[$j][0] < $x_min) { $x_min = $$xy_points[$j][0]; } if ($$xy_points[$j][0] > $x_max) { $x_max = $$xy_points[$j][0]; } if ($$xy_points[$j][1] < $y_min) { $y_min = $$xy_points[$j][1]; } if ($$xy_points[$j][1] > $y_max) { $y_max = $$xy_points[$j][1]; } } } # calculate the scale and offset value my $slopish=($y_max-$y_min)/($x_max-$x_min+.0000000001); my $x_scale = 0; if ($x_max - $x_min != 0) { $x_scale = $x_size / ($x_max - $x_min); } my $x_offset = -$x_min; my $y_scale = 0; if ($y_max - $y_min != 0) { $y_scale = $y_size / ($y_max - $y_min); } #$y_scale*=int($slopish); my $y_offset = -$y_min; # clear the graph for (my $x=0; $x<$x_size; $x++) { for (my $y=0; $y<=$y_size; $y++) { $$self[$x][$y] = ' '; } } # draw the axes my $x_axis = int($x_scale * $x_offset); my $y_axis = int($y_scale * $y_offset); if (($y_axis >= 0) and ($y_axis < $y_size)) { for (my $x=0; $x<$x_size; $x++) { $$self[$x][$y_axis] = '-'; } } if (($x_axis >= 0) and ($x_axis < $x_size)) { for (my $y=0; $y<$y_size; $y++) { $$self[$x_axis][$y] = '|'; } } if (($x_axis >= 0) and ($x_axis < $x_size) and($y_axis >= 0) and ($y_axis < $y_size)) { $$self[$x_axis][$y_axis] = '+'; } # plot the points for (my $i=0; $i<$num_points; $i++) { my $x_pos = 0; if ($$xy_points[$i][0]||$$xy_points[$i][0]==0) { $x_pos = int($x_scale * ($$xy_points[$i][0] + $x_offset)); } if ($x_pos < 0) { $x_pos = 0; } elsif ($x_pos > $x_size - 1) { $x_pos = $x_size - 1; } my $y_pos = 0; if ($$xy_points[$i][1]||$$xy_points[$i][1]==0) { $y_pos = int($y_scale * ($$xy_points[$i][1] + $y_offset)); } if ($y_pos < 0) { $y_pos = 0; } elsif ($y_pos > $y_size - 1) { $y_pos = $y_size - 1; } if (($$xy_points[$i][0]||$$xy_points[$i][0]==0) and ($$xy_points[$i][1]||$$xy_points[$i][1]==0)) { $$self[$x_pos][$y_pos] = $char; } } ############################################### Louis my @xvalues; #Values from column 1 of the file my @yvalues; #Values from column 2 of the file my $correlation; #The "r" value my $slope; #Indicates rate of y/x my $intercept; #Regressed value of y at x=0 my $sign; #For printing the f(x)=mx+b equation my $countx=0; #For iterating through the file my @scatter_array; #Populate the two arrays containing the x and y values for (my $i=0; $i<$num_points; $i++) { if (($$xy_points[$i][0])||($$xy_points[$i][0] == 0)) { $xvalues[$i] = $$xy_points[$i][0]; } if (($$xy_points[$i][1])||($$xy_points[$i][1] == 0)) { $yvalues[$i] = $$xy_points[$i][1]; } } #Return the total from one array sub total () { my ($z_ref) = @_; my $total=0; my $value=0; foreach $value (@$z_ref) { $total+=$value; } return $total; } #Return the mean from one array [requires &total] sub mean () { my ($z_ref) = @_; my $total=&total($z_ref); return $total/(@$z_ref); } #Return the variance from one array [requires &mean] sub variance () { my ($z_ref) = @_; my $numerator=0; my $mean=&mean($z_ref); foreach my $value (@$z_ref) { $numerator+=($value-$mean)**2; } return $numerator/($#$z_ref); } #Return the standard deviation from one array [requires &variance] sub stdDeviation () { my ($z_ref) = @_; return sqrt(&variance($z_ref))+.000000001; #Avoiding divide by zero with the result } #Return the correlation between two arrays [requires &mean, &stdDeviation] sub correlation () { my ($x_ref, $y_ref) = @_; my $corrsub=0; my $meanx=&mean($x_ref); my $meany=&mean($y_ref); my $devx=&stdDeviation($x_ref); my $devy=&stdDeviation($y_ref); for (my $i=0; $i<@$x_ref; $i++) { $corrsub+=(($x_ref->[$i]-$meanx)/$devx)*(($y_ref->[$i]-$meany)/$devy); } if ($devy<.0001) { return 1; } else { return $corrsub/($#$x_ref); } } #Return the regressed slope of two arrays [requires &correlation, &stdDeviation] sub getSlope () { my ($x_ref, $y_ref) = @_; return &correlation($x_ref, $y_ref)*(&stdDeviation($y_ref)/&stdDeviation($x_ref)); } #Return the regressed y intercept of two arrays [requires &getSlope, &mean] sub getIntercept () { my ($x_ref, $y_ref) = @_; return &mean($y_ref)-(&getSlope($x_ref, $y_ref) * &mean($x_ref)); } $slope=&getSlope(\@xvalues, \@yvalues); $intercept=&getIntercept(\@xvalues, \@yvalues); $correlation=&correlation(\@xvalues, \@yvalues); my $correlation_type; my $correlation_strength; if ($correlation<0) { $correlation_type="negative"; } elsif ($correlation>0) { $correlation_type="positive"; } else { $correlation_type="nil"; } if (abs($correlation)>.99999) { $correlation_strength=" and perfectly correlated"; } elsif (abs($correlation)>0.99) { $correlation_strength=" and almost perfectly correlated"; } elsif (abs($correlation)>0.90) { $correlation_strength=" and very strongly correlated"; } elsif (abs($correlation)>0.70) { $correlation_strength=" and strongly correlated"; } elsif (abs($correlation)>0.50) { $correlation_strength=" and moderately correlated"; } elsif (abs($correlation)>0.30) { $correlation_strength=" and weakly correlated"; } elsif (abs($correlation)>0.20) { $correlation_strength=" and neglibily correlated"; } else { $correlation_type=""; $correlation_strength="not correlated"; } if ($intercept<0) { $sign="-"; }else{ $sign="+"; } # plot the regression line if ($reg_calc) { for (my $i=$x_min; $i< $x_max; $i+=(1/$x_scale)) { my $x_pos; $x_pos = int($x_scale * ($i + $x_offset)); if ($x_pos < 0) { $x_pos = 0; } elsif ($x_pos > $x_size - 1) { $x_pos = $x_size - 1; } my $y_pos = 0; $y_pos = int($y_scale * ($i*$slope +$intercept + $y_offset)); if ($y_pos < 0) { $y_pos = 0; } elsif ($y_pos > $y_size - 1) { $y_pos = $y_size - 1; } if ($$self[$x_pos][$y_pos] eq '.') { $$self[$x_pos][$y_pos] = '.'; } elsif ($$self[$x_pos][$y_pos] eq '*') { $$self[$x_pos][$y_pos] = '*'; } elsif ($$self[$x_pos][$y_pos] eq $char) { $$self[$x_pos][$y_pos] = '*'; } else { $$self[$x_pos][$y_pos] = '.'; } } } ############################################### Louis # add the axes limits # left label my $y_pos = 1; if ($y_axis < 1) { $y_pos = 1; } elsif ($y_axis > $y_size) { $y_pos = $y_size; } else { $y_pos = $y_axis; } my $label = sprintf("%0.1f ", $x_min); my $l = length($label); for (my $i=0; $i<$l; $i++) { $$self[$i][$y_pos] = substr($label, $i, 1); } # right label $label = sprintf(" %0.1f", $x_max); $l = length($label); my $x_label_pos = $x_size - $l; for (my $i=0; $i<$l; $i++) { $$self[$x_label_pos+$i][$y_pos] = substr($label, $i, 1); } # bottom label $label = sprintf("%0.1f", $y_min); $l = length($label); my $y_label_pos = $x_axis - int($l/2); if ($y_label_pos < 0) { $y_label_pos = 0; } elsif ($y_label_pos + $l > $x_size) { $y_label_pos = $x_size - $l; } for (my $i=0; $i<$l; $i++) { $$self[$y_label_pos+$i][0] = substr($label, $i, 1); } # top label $label = sprintf("%0.1f", $y_max); $l = length($label); $y_label_pos = $x_axis - int($l/2); if ($y_label_pos < 0) { $y_label_pos = 0; } elsif ($y_label_pos + $l > $x_size) { $y_label_pos = $x_size - $l; } for (my $i=0; $i<$l; $i++) { $$self[$y_label_pos+$i][$y_size-1] = substr($label, $i, 1); } # add the labels # x label $l = length($x_label); $x_label_pos = $x_size - $l; for (my $i=0; $i<$l; $i++) { $$self[$x_label_pos+$i][$y_pos+1] = substr($x_label, $i, 1); } # y label $l = length($y_label); $y_label_pos = $x_axis - int($l/2); if ($y_label_pos < 0) { $y_label_pos = 0; } elsif ($y_label_pos + $l > $x_size) { $y_label_pos = $x_size - $l; } for (my $i=0; $i<$l; $i++) { $$self[$y_label_pos+$i][$y_size] = substr($y_label, $i, 1); } # print the $self my $prefix = "\n"; my $newline = "\n"; my $postfix = "\n"; if (($type eq 'html') or ($type eq 'HTML')) { $prefix = "
\n"; $newline = "\n"; $postfix = "\n"; } print $prefix; for (my $y=$y_size; $y>=0; $y--) { for (my $x=0; $x<$x_size; $x++) { print $$self[$x][$y]; } print $newline; } print $postfix; if ($reg_calc) { printf ("Linear regression forumula: f(x)=%gx +%.4f\n", $slope, $intercept); printf (" R-value: %g\n", $correlation); printf (" R^2-value: %g\n", $correlation**2); printf ("\n\nSummary:\n"); printf("The function f(x)=mx+b represented by this data is: f(x)=%gx %s%.4f\n", $slope, $sign, abs($intercept)); printf("The x intercept is: %g\n", (-1)*(abs($intercept))/$slope); printf("The correlation between the %g records in this set is: %g\n", scalar @xvalues, $correlation); printf("This correlation is defined as %s%s.\n\n", $correlation_type, $correlation_strength); printf("The coefficient of determination (r^2 value) of these records is: %g\n", $correlation**2); printf("This means that %.1f%% of these records can be explained by the above equation.\n\n", 100*($correlation**2)); } return 1; } 1; __END__ =head1 NAME ScatterPlot - Perl extension for drawing ASCII scatter plots =head1 SYNOPSIS use ScatterPlot; =head1 DESCRIPTION This module will draw a quick and easy ASCII scatter plot. It has only two functions, new() and draw(). new() takes no arguments and creates a new ScatterPlot object. draw() can be called with no arguments to draw a sample test plot. You can call draw like this: draw($xy_points); where $xy_points is a reference to an array of (x,y) pairs. See the file ScatterPlot.pl for an example. The full call to draw is: draw($xy_points, $x_size, $y_size, $x_label, $y_label, $char, $type, $reg_line); where $xy_points is a reference to an array of (x,y) pairs, $x_size is an integer describing the width of the plot in characters, $y_size is an integer describing the height of the plot in characters, $x_label is a string for the horizontal axis label, $y_label is a string for the vertical axis lable, $char is the plot character, and $type is either 'text', 'html', or 'HTML'. If you are using CGI or sending the plot output to a web page, then use $type='html' or $type='HTML'. The method draw() will automatically scale the plot to fit your data and draw the axes labels accordingly. The size of the output text will be $y_size lines of text, each of which is $x_size long in characters (plus line terminator). In text mode the plot begins with "\n" and ends with "\n", while in html mode the plot begins with "
" and ends with "<\pre>". =head2 EXPORT none =head1 SEE ALSO The example file ScatterPlot.pl contains an example of how to use the ScatterPlot module. =head1 AUTHORS Les Hall, Einventor-66@comcast.netE Louis Romero, E louis_romero@hotmail.comE =head1 COPYRIGHT AND LICENSE Copyright 2007 by Les Hall This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut