#!/usr/bin/perl # Copyright (C) 2008 by BBN Technologies and University of Maryland (UMD) # BBN and UMD grant a nonexclusive, source code, royalty-free right to # use this Software known as Correlation Calculator (the # "Software") solely for research purposes. Provided, you must agree # to abide by the license and terms stated herein. Title to the # Software and its documentation and all applicable copyrights, trade # secrets, patents and other intellectual rights in it are and remain # with BBN and UMD and shall not be used, revealed, disclosed in # marketing or advertisement or any other activity not explicitly # permitted in writing. # BBN and UMD make no representation about suitability of this # Software for any purposes. It is provided "AS IS" without express # or implied warranties including (but not limited to) all implied # warranties of merchantability or fitness for a particular purpose. # In no event shall BBN or UMD be liable for any special, indirect or # consequential damages whatsoever resulting from loss of use, data or # profits, whether in an action of contract, negligence or other # tortuous action, arising out of or in connection with the use or # performance of this Software. # Without limitation of the foregoing, user agrees to commit no act # which, directly or indirectly, would violate any U.S. law, # regulation, or treaty, or any other international treaty or # agreement to which the United States adheres or with which the # United States complies, relating to the export or re-export of any # commodities, software, or technical data. This Software is licensed # to you on the condition that upon completion you will cease to use # the Software and, on request of BBN and UMD, will destroy copies of # the Software in your possession. my ($show_minmax, $show_systems, $use_weighted, $show_fit) = (0, 0, 0, 0); my @pARGV = (); foreach my $a (@ARGV) { if ($a =~ /^-(.+)$/) { my @v = split(//, $1); foreach my $b (@v) { if ($b eq "m") { $show_minmax = 1; } elsif ($b eq "s") { $show_systems = 1; } elsif ($b eq "f") { $show_fit = 1; } elsif ($b eq "F") { $show_fit = 2; } elsif ($b eq "w") { $use_weighted = 1; } elsif ($b eq "h") { usage(); } else { print "Unknown option -$b\n"; usage(); } } } else { push @pARGV, $a; } } usage() unless (($#pARGV >= 2) && ($#pARGV <= 3)); my ($adcorr, $swgt, $prefix, $segfitfname) = @pARGV; %SEGAD; %DOCAD; %SYSAD; %SEGSC; %DOCSC; %SYSSC; %SEGWGT; %DOCWGT; print "Loading segment weights from $swgt\n"; load_swgts($swgt); print "Loading judgment scores from $adcorr\n"; load_ad_scores($adcorr); my @seg_files = glob($prefix . '*.seg.scr'); my @doc_files = glob($prefix . '*.doc.scr'); my @sys_files = glob($prefix . '*.sys.scr'); print "Loading automatic scores\n"; load_seg_scores(@seg_files); load_doc_scores(@doc_files); load_sys_scores(@sys_files); if ($show_fit == 1) { print "Linear fit is EVAL = a + b*HJ\n"; } elsif ($show_fit == 2) { print "Linear fit is HJ = a + b*EVAL\n"; } if (defined $segfitfname) { print "Outputing segment fit values to $segfitfname\n"; output_fit($segfitfname, \%SEGAD, \%SEGSC); } if ($use_weighted) { output_corr("SEG", \%SEGAD, \%SEGSC, 1, \%SEGWGT); output_corr("DOC", \%DOCAD, \%DOCSC, 0, \%DOCWGT); } else { output_corr("SEG", \%SEGAD, \%SEGSC, 1); output_corr("DOC", \%DOCAD, \%DOCSC, 0); } output_corr("SYS", \%SYSAD, \%SYSSC, 0); sub usage { print("calc_corr.pl [-hmsfFw] [segfitfname]\n", " if segfitfname is specified then the segment level fit values will be written \n", " to that file, where line is of the form: \n", " -h : Show this help message\n", " -m : Show min and max indiviual systems\n", " -s : Show all individual systems\n", " -f : Show linear fit equation (EVAL = a + b*HJ)\n", " -F : Show linear fit equation, but reverse it so that we have (HJ = a + b*EVAL)\n", " -w : Show weighted segment and document correlations (pearson only)\n", ); exit(-1); } sub output_fit { my ($fname, $had, $hsc) = @_; my @allsys_ad = (); my @allsys_sc = (); my @allsys_id = (); my $max_k = 0; foreach my $k (sort keys %{$had}) { if (! (exists $hsc->{$k})) { print "WARNING. Not automatic score found for $k\n"; } else { push @allsys_ad, $had->{$k}; push @allsys_sc, $hsc->{$k}; my $lk = $k; $lk =~ s/^([^,]+),([^,]+),([^,]+)$/[$1][$2][$3]/; $max_k = length($lk) if (length($lk) > $max_k); push @allsys_id, $lk; } } return if ($#allsys_ad < 0); my ($fit_a, $fit_b) = (0.0, 0.0); if ($show_fit == 2) { ($fit_a, $fit_b) = MattStat::linear_fit(\@allsys_sc, \@allsys_ad); } else { ($fit_a, $fit_b) = MattStat::linear_fit(\@allsys_ad, \@allsys_sc); } open(OFH, ">", $fname) or die("Cannot open $fname for writing"); foreach my $i (0..$#allsys_id) { my $targ = 0.0; if ($show_fit == 2) { $targ = ($fit_b * $allsys_sc[$i]) + $fit_a; } else { $targ = ($fit_b * $allsys_ad[$i]) + $fit_a; } printf(OFH "%-${max_k}s %8.3f %8.3f %8.3f\n", $allsys_id[$i], $allsys_ad[$i], $allsys_sc[$i], $targ); } close(OFH); } sub output_corr { my ($type, $had, $hsc, $show_header, $WGTS) = @_; my @allsys_ad = (); my @allsys_sc = (); my @allsys_wgts = (); my %sys_ad = (); my %sys_sc = (); my %sys_wgts = (); foreach my $k (sort keys %{$had}) { if (! (exists $hsc->{$k})) { print "WARNING. Not automatic score found for $k\n"; } else { push @allsys_ad, $had->{$k}; push @allsys_sc, $hsc->{$k}; my ($s, @rest) = split(/,/, $k); push @{$sys_ad{$s}}, $had->{$k}; push @{$sys_sc{$s}}, $hsc->{$k}; if (defined $WGTS) { my $wk = join(",", @rest); push @allsys_wgts, $WGTS->{$wk}; push @{$sys_wgts{$s}}, $WGTS->{$wk}; } } } return if ($#allsys_ad < 0); # print "$type Scores\n------------\n" . join("\n", @allsys_sc) . "\n\n"; # print "$type Adequacy\n------------\n" . join("\n", @allsys_ad) . "\n\n"; my $formath = "%-4s %-12s %-9s %15s %-9s %15s %-8s %4s\n"; my $format = "%-4s %-12s %8.5f%1s (%6.3f %6.3f) %8.3f%1s (%6.3f %6.3f) %8.3f %4d\n"; my @fields = ("Type", "System", "Pearson", "95%-Interval", "Spearman", "95%-Interval", "Tao", "N"); if ($show_fit) { $formath = "%-4s %-12s %-9s %15s %-9s %15s %-8s %-8s %-8s %4s\n"; $format = "%-4s %-12s %8.5f%1s (%6.3f %6.3f) %8.3f%1s (%6.3f %6.3f) %8.3f %8.3f %8.3f %4d\n"; @fields = ("Type", "System", "Pearson", "95%-Interval", "Spearman", "95%-Interval", "Tao", "Fit-A", "Fit-B", "N"); } printf($formath, @fields) if ($show_header); my $pear, $pear_sig, $pear_min, $pear_max; my ($fit_a, $fit_b) = (0.0, 0.0); if ($show_fit == 1) { ($fit_a, $fit_b) = MattStat::linear_fit(\@allsys_ad, \@allsys_sc); } elsif ($show_fit == 2) { ($fit_a, $fit_b) = MattStat::linear_fit(\@allsys_sc, \@allsys_ad); } if ($#allsys_wgts > 0) { ($pear, $pear_sig, $pear_min, $pear_max) = my_interval("weighted_pearson", \@allsys_ad, \@allsys_sc, 0.95, \@allsys_wgts); } else { ($pear, $pear_sig, $pear_min, $pear_max) = my_interval("pearson", \@allsys_ad, \@allsys_sc, 0.95); } my ($spear, $spear_sig, $spear_min, $spear_max) = my_interval("spearman", \@allsys_ad, \@allsys_sc, 0.95); my $tao = MattStat::tao(\@allsys_ad, \@allsys_sc); my $N = $#allsys_ad + 1; $pear_sig = ($pear_sig ? "" : "*"); $spear_sig = ($spear_sig ? "" : "*"); my @rfields = ($pear, $pear_sig, $pear_min, $pear_max, $spear, $spear_sig, $spear_min, $spear_max, $tao); if ($show_fit) { push @rfields, $fit_a, $fit_b; } push @rfields, $N; printf($format, $type, "ALL", @rfields); my $min_sys = 0; my $max_sys = 0; my $min_r = 2.0; my $max_r = -2.0; my @sys_outs; foreach my $s (sort keys %sys_ad) { next if ($#{$sys_ad{$s}} < 1); my ($pear, $pear_sig, $pear_min, $pear_max); my ($fit_a, $fit_b) = (0.0, 0.0); if ($show_fit == 1) { ($fit_a, $fit_b) = MattStat::linear_fit($sys_ad{$s}, $sys_sc{$s}); } elsif ($show_fit == 2) { ($fit_a, $fit_b) = MattStat::linear_fit($sys_sc{$s}, $sys_ad{$s}); } if ($#{$sys_wgts{$s}} > 0) { ($pear, $pear_sig, $pear_min, $pear_max) = my_interval("weighted_pearson", $sys_ad{$s}, $sys_sc{$s}, 0.95, $sys_wgts{$s}); } else { ($pear, $pear_sig, $pear_min, $pear_max) = my_interval("pearson", $sys_ad{$s}, $sys_sc{$s}, 0.95); } my ($spear, $spear_sig, $spear_min, $spear_max) = my_interval("spearman", $sys_ad{$s}, $sys_sc{$s}, 0.95); my $tao = MattStat::tao($sys_ad{$s}, $sys_sc{$s}); my $N = ($#{$sys_ad{$s}} + 1); $pear_sig = ($pear_sig ? "" : "*"); $spear_sig = ($spear_sig ? "" : "*"); my @rfields = ($pear, $pear_sig, $pear_min, $pear_max, $spear, $spear_sig, $spear_min, $spear_max, $tao); if ($show_fit) { push @rfields, $fit_a, $fit_b; } push @rfields, $N; my $out_s = sprintf($format, "", $s, @rfields); $out_s =~ s/\n$//; # remove final newline push @sys_outs, $out_s; if (abs($pear) < $min_r) { $min_r = abs($pear); $min_sys = $#sys_outs; } if (abs($pear) > $max_r) { $max_r = abs($pear); $max_sys = $#sys_outs; } } return if ($#sys_outs < 0); if ($show_systems) { foreach my $i (0..$#sys_outs) { my $tail = ""; if ($show_minmax) { $tail .= " (min)" if ($i == $min_sys); $tail .= " (max)" if ($i == $max_sys); } $tail .= "\n"; print $sys_outs[$i] . $tail; } } elsif ($show_minmax) { print $sys_outs[$min_sys] . " (min)\n"; print $sys_outs[$max_sys] . " (max)\n"; } } sub my_interval { my ($name, $arr1, $arr2, $conf, $wgts) = @_; my $N = $#{$arr1} + 1; if ($name eq "pearson") { if ($N <= 3) { my $r = MattStat::pearson($arr1, $arr2); my $sg = MattStat::corr_signif($r, $N); return ($r, $sg, -1.0, 1.0); } else { return MattStat::pearson_int($arr1, $arr2, $conf); } } elsif ($name eq "weighted_pearson") { if ($N <= 3) { my $r = MattStat::weighted_pearson($arr1, $arr2, $wgts); my $sg = MattStat::corr_signif($r, $N); return ($r, $sg, -1.0, 1.0); } else { return MattStat::weighted_pearson_int($arr1, $arr2, $wgts, $conf); } } elsif ($name eq "spearman") { if ($N <= 3) { my $r = MattStat::spearman($arr1, $arr2); my $sg = MattStat::corr_signif($r, $N); return ($r, $sg, -1.0, 1.0); } else { return MattStat::spearman_int($arr1, $arr2, $conf); } } } sub load_seg_scores { my (@files) = @_; foreach my $f (@files) { open(FH, "<", $f) or die("Cannot open $f for reading."); while() { chomp; my @v = split(/\t/); my ($set, $sys, $doc, $seg, $sc) = @v; my $k = join(",", $sys, $doc, $seg); if (exists $SEGSC{$k}) { print "WARNING. Multiple segment scores for $k\n"; } $SEGSC{$k} = $sc; } close(FH); } } sub load_doc_scores { my (@files) = @_; foreach my $f (@files) { open(FH, "<", $f) or die("Cannot open $f for reading."); while() { chomp; my @v = split(/\t/); my ($set, $sys, $doc, $sc) = @v; my $k = join(",", $sys, $doc); if (exists $DOCSC{$k}) { print "WARNING. Multiple doc scores for $k\n"; } $DOCSC{$k} = $sc; } close(FH); } } sub load_sys_scores { my (@files) = @_; foreach my $f (@files) { open(FH, "<", $f) or die("Cannot open $f for reading."); while() { chomp; my @v = split(/\t/); my ($set, $sys, $sc) = @v; my $k = join(",", $sys); if (exists $SYSSC{$k}) { print "WARNING. Multiple system scores for $k\n"; } $SYSSC{$k} = $sc; } close(FH); } } sub load_ad_scores { my ($adcorr) = @_; open(FH, "<", $adcorr) or die("Cannot open $adcorr for reading.\n"); my $first = 1; my %DOCCT; my %SYSCT; while () { if ($first) { $first = 0; next; } chomp; my @v = split(/,/); my ($sysid, $doc, $seg, $score, $scoreQual) = (@v[0..2], @v[5..6]); my $k = join(",", $sysid, $doc, $seg); if (exists $SEGAD{$k}) { print "WARNING. Multiple scores for $k\n"; } $SEGAD{$k} = $score; my $dk = join(",", $sysid, $doc); my $wk = join(",", $doc, $seg); $DOCAD{$dk} += $score * $SEGWGT{$wk}; $DOCCT{$dk} += $SEGWGT{$wk}; $SYSAD{$sysid} += $score * $SEGWGT{$wk}; $SYSCT{$sysid} += $SEGWGT{$wk}; } close(FH); foreach my $k (keys %SYSCT) { $SYSAD{$k} /= $SYSCT{$k}; } foreach my $k (keys %DOCCT) { $DOCAD{$k} /= $DOCCT{$k}; } } sub load_swgts { my ($swgt) = @_; open(FH, "<", $swgt) or die("Cannot open $swgt for reading"); while() { chomp; next if (m/^\s*$/); my ($doc, $seg, $wgt) = split(/\t/); my $k = join(",", $doc, $seg); $SEGWGT{$k} = $wgt; $DOCWGT{$doc} += $wgt; } close(FH); } package MattStat; use strict; sub log10 { my $n = shift; return log($n)/log(10); } sub r_conf_interval { my ($r, $N, $conf) = @_; my $absr = abs($r); if (($absr >= 1.0) && ($absr <= 1.001)) { $absr = 1.0; if($r < 0) { $r = -1.0; } else { $r = 1.0; } } die("Invalid value of r=$r ($absr) for pearson confidence interval") unless ($absr <= 1.0); die("Invalid value of N=$N (must be greater than 3) for pearson confidence interval") unless ($N > 3); my $zp = fisher_r_to_z($r); # my $z = 2.58; # for 99% my $z = 1.96; # for 95% $z = lookup_z_for_int($conf) if (defined $conf); my $in = $z * (1 / sqrt($N - 3)); return (fisher_z_to_r($zp - $in), fisher_z_to_r($zp + $in)); } sub fisher_z_to_r { my $z = shift; return (exp(2.0*$z)-1)/(exp(2.0*$z)+1) } sub fisher_r_to_z { my $r = shift; $r = 0.9999999999 if ($r == 1.0); $r = -0.9999999999 if ($r == -1.0); die ("Invalid value of r=$r for fisher r to z transformation.") unless (abs($r) < 1.0); return (0.5) * (log(1+$r) - log(1-$r)); } # directional (one-sided) sub corr_signif_d { my ($r, $N, $conf) = @_; $conf = 0.95 unless (defined $conf); my $df = $N-2; my $td = abs($r / (sqrt((1-($r**2)) / ($N / 2)))); my $t = os_ttable_lookup($df, $conf); return 1 if ($t <= $td); return 0; } # non-directional (two-sided) sub corr_signif_nd { my ($r, $N, $conf) = @_; $conf = 0.95 unless (defined $conf); my $df = $N-2; return 0 if ($N == 0); return 1 if (($r**2) == 1); my $den = (1-($r**2)) / ($N / 2); $den = abs($den) if ($den < 0); my $td = abs($r / (sqrt($den))); my $t = ts_ttable_lookup($df, $conf); return 1 if ($t <= $td); return 0; } sub corr_signif { my ($r, $N, $conf) = @_; return corr_signif_nd($r, $N, $conf); } sub linear_fit { # find fit for line: y = a + b*x # return (a, b) my ($x_arr, $y_arr) = @_; my ($mean_x, $mean_y) = (mean($x_arr), mean($y_arr)); my $n = ($#{$x_arr} + 1); my $b = 0.0; my $den = 0.0; foreach my $i (0..($n-1)) { $b += ($x_arr->[$i] * $y_arr->[$i]); $den += ($x_arr->[$i]**2); } $b -= ($n * $mean_x * $mean_y); $den -= ($n * ($mean_x ** 2)); $b = $b / $den; my $a = $mean_y - ($b * $mean_x); return ($a, $b); } sub mean { my ($arr) = @_; my $tot = 0.0; foreach my $v (@{$arr}) { $tot += $v; } return $tot / ($#{$arr} + 1); } sub weighted_mean { my ($arr, $wgt) = @_; my $tot_wgt = 0; my $tot_val = 0; foreach my $i (0..$#{$arr}) { # print "wm: $i: $wgt->[$i] $arr->[$i]\n"; $tot_wgt += $wgt->[$i]; $tot_val += ($wgt->[$i] * $arr->[$i]); } return $tot_val / $tot_wgt; } sub variance { my ($arr) = @_; my $mean = mean($arr); my $tot = 0.0; foreach my $v (@{$arr}) { $tot += (($v - $mean)**2); } return $tot / (($#{$arr})+1); } sub weighted_variance { my ($arr, $wgt) = @_; my $tot_wgt = 0.0; my $tot_val = 0.0; my $mean = weighted_mean($arr, $wgt); foreach my $i (0..$#{$arr}) { $tot_wgt += $wgt->[$i]; $tot_val += ($wgt->[$i] * (($arr->[$i] - $mean)**2)); } return $tot_val / $tot_wgt; } sub std_dev { my ($arr) = @_; return sqrt(variance($arr)); } sub weighted_std_dev { my ($arr, $wgt) = @_; return sqrt(weighted_variance($arr, $wgt)); } sub zscores { my ($arr) = @_; my @z; my ($mean, $std_dev) = (mean($arr), std_dev($arr)); foreach my $i (0..$#{$arr}) { $z[$i] = (($arr->[$i] - $mean) / $std_dev); } return \@z; } sub weighted_zscores { my ($arr, $wgt) = @_; my @z; my ($mean, $std_dev) = (weighted_mean($arr, $wgt), weighted_std_dev($arr, $wgt)); # if ($std_dev == 0) { # print stderr "WARNING: weighted standard deviatiation is 0.\n"; # $std_dev = 0.000000001; # } foreach my $i (0..$#{$arr}) { $z[$i] = (($arr->[$i] - $mean) / $std_dev); } return \@z; } sub sum { my ($a) = @_; my $tot = 0.0; foreach my $v (@{$a}) { $tot += $v; } return $tot; } sub pearson_int { my ($a1, $a2, @confs) = @_; die("Differing number of points in arrays.") if ($#{$a1} != $#{$a2}); my ($r, $N) = (pearson($a1, $a2), $#{$a1}+1); push @confs, 0.95 if (! (defined @confs)); my @toret = ($r, corr_signif($r, $N)); foreach my $c (@confs) { push @toret, r_conf_interval($r, $N); } return @toret; } sub weighted_pearson_int { my ($a1, $a2, $wgts, @confs) = @_; die("Differing number of points in arrays.") if ($#{$a1} != $#{$a2}); my ($r, $N) = (weighted_pearson($a1, $a2, $wgts), $#{$a1}+1); push @confs, 0.95 if (! (defined @confs)); my @toret = ($r, corr_signif($r, $N)); foreach my $c (@confs) { push @toret, r_conf_interval($r, $N); } return @toret; } sub spearman_int { my ($a1, $a2, @confs) = @_; die("Differing number of points in arrays.") if ($#{$a1} != $#{$a2}); my ($r, $N) = (spearman($a1, $a2), $#{$a1}+1); push @confs, 0.95 if (! (defined @confs)); my @toret = ($r, corr_signif($r, $N)); foreach my $c (@confs) { push @toret, r_conf_interval($r, $N); } return @toret; } sub pearson { my ($a1, $a2) = @_; my ($z1, $z2) = (zscores($a1), zscores($a2)); my $N = $#{$a1} + 1; my $r = 0.0; foreach my $i (0..$#{$a1}) { $r += ($z1->[$i] * $z2->[$i]); } return $r / $N; } sub weighted_pearson { my ($a1, $a2, $wgt) = @_; my ($z1, $z2) = (weighted_zscores($a1, $wgt), weighted_zscores($a2, $wgt)); my $N = sum($wgt); # print " z1: " . join(" ", @{$z1}) . "\n"; # print " z2: " . join(" ", @{$z2}) . "\n"; # print " a1: " . join(" ", @{$a1}) . "\n"; # print " a2: " . join(" ", @{$a2}) . "\n"; # print " wt: " . join(" ", @{$wgt}) . "\n"; # print " mean = " . mean($a1) . " " . mean($a2) . "\n"; # print " sum = $N wmean = " . weighted_mean($a1, $wgt) . " " . weighted_mean($a2, $wgt) . " wsd = " . weighted_std_dev($a1, $wgt) . " " . weighted_std_dev($a2, $wgt) . "\n"; my $r = 0.0; foreach my $i (0..$#{$z1}) { $r += ($wgt->[$i] * $z1->[$i] * $z2->[$i]); } # print "Correlation: " . $r/$N . "\n"; return $r / $N; } sub spearman { my ($a1, $a2) = @_; my ($n1, $n2) = (rank_convert($a1), rank_convert($a2)); return pearson($n1, $n2); } sub tao { my ($a1, $a2) = @_; my ($n1, $n2) = (rank_convert($a1), rank_convert($a2)); my $P = 0; foreach my $i (0..$#{$n1}) { foreach my $j (0..$#{$n1}) { $P++ if (($n1->[$j] > $n1->[$i]) && ($n2->[$j] > $n2->[$i])); } } my $n = ($#{$n1} + 1); return 0 if ($n == 0); my $tao = ((4*$P) / ($n * ($n - 1.0))) - 1.0; return $tao; } sub weighted_spearman { my ($a1, $a2, $wgt) = @_; my ($n1, $n2) = (rank_convert($a1), rank_convert($a2)); return weighted_pearson($n1, $n2, $wgt); } sub rank_convert { my ($arr) = @_; my @sarr = sort {$a <=> $b} @{$arr}; my %h; my $prev = -1; my $prev_val = "s-1"; foreach my $s (0..$#sarr) { my $cur = sprintf("s%.4f", $sarr[$s]); if ($prev == -1) { $prev = $s; $prev_val = $cur; } elsif ($cur ne $prev_val) { my $rank = 1+(($prev + ($s-1)) / 2); # print "storing $prev_val with rank = $rank.\n"; $h{$prev_val} = $rank; $prev = $s; $prev_val = $cur; } } my $rank = 1+(($prev + ($#sarr)) / 2); # print "storing $prev_val with rank = $rank.\n"; $h{$prev_val} = $rank; my @newarr; foreach my $s (0..$#{$arr}) { my $val = sprintf("s%.4f", $arr->[$s]); # print "looking up $val\n"; if (exists($h{$val})) { $newarr[$s] = $h{$val}; # print "pos $s ($val) is $h{$val}\n"; } else { print "Uh-oh! Can't find $val (for pos $s) in hash for rank convert!\n"; exit(0); } } return \@newarr; } my @os_ttable_rows = qw( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 40 50 60 80 100 120 inf); my @os_ttable_cols = qw(0.75 0.80 0.85 0.90 0.95 0.975 0.99 0.995 0.9975 0.999 0.9995); my @os_ttable = qw( 1.000 1.376 1.963 3.078 6.314 12.71 31.82 63.66 127.3 318.3 636.6 0.816 1.061 1.386 1.886 2.920 4.303 6.965 9.925 14.09 22.33 31.60 0.765 0.978 1.250 1.638 2.353 3.182 4.541 5.841 7.453 10.21 12.92 0.741 0.941 1.190 1.533 2.132 2.776 3.747 4.604 5.598 7.173 8.610 0.727 0.920 1.156 1.476 2.015 2.571 3.365 4.032 4.773 5.893 6.869 0.718 0.906 1.134 1.440 1.943 2.447 3.143 3.707 4.317 5.208 5.959 0.711 0.896 1.119 1.415 1.895 2.365 2.998 3.499 4.029 4.785 5.408 0.706 0.889 1.108 1.397 1.860 2.306 2.896 3.355 3.833 4.501 5.041 0.703 0.883 1.100 1.383 1.833 2.262 2.821 3.250 3.690 4.297 4.781 0.700 0.879 1.093 1.372 1.812 2.228 2.764 3.169 3.581 4.144 4.587 0.697 0.876 1.088 1.363 1.796 2.201 2.718 3.106 3.497 4.025 4.437 0.695 0.873 1.083 1.356 1.782 2.179 2.681 3.055 3.428 3.930 4.318 0.694 0.870 1.079 1.350 1.771 2.160 2.650 3.012 3.372 3.852 4.221 0.692 0.868 1.076 1.345 1.761 2.145 2.624 2.977 3.326 3.787 4.140 0.691 0.866 1.074 1.341 1.753 2.131 2.602 2.947 3.286 3.733 4.073 0.690 0.865 1.071 1.337 1.746 2.120 2.583 2.921 3.252 3.686 4.015 0.689 0.863 1.069 1.333 1.740 2.110 2.567 2.898 3.222 3.646 3.965 0.688 0.862 1.067 1.330 1.734 2.101 2.552 2.878 3.197 3.610 3.922 0.688 0.861 1.066 1.328 1.729 2.093 2.539 2.861 3.174 3.579 3.883 0.687 0.860 1.064 1.325 1.725 2.086 2.528 2.845 3.153 3.552 3.850 0.686 0.859 1.063 1.323 1.721 2.080 2.518 2.831 3.135 3.527 3.819 0.686 0.858 1.061 1.321 1.717 2.074 2.508 2.819 3.119 3.505 3.792 0.685 0.858 1.060 1.319 1.714 2.069 2.500 2.807 3.104 3.485 3.767 0.685 0.857 1.059 1.318 1.711 2.064 2.492 2.797 3.091 3.467 3.745 0.684 0.856 1.058 1.316 1.708 2.060 2.485 2.787 3.078 3.450 3.725 0.684 0.856 1.058 1.315 1.706 2.056 2.479 2.779 3.067 3.435 3.707 0.684 0.855 1.057 1.314 1.703 2.052 2.473 2.771 3.057 3.421 3.690 0.683 0.855 1.056 1.313 1.701 2.048 2.467 2.763 3.047 3.408 3.674 0.683 0.854 1.055 1.311 1.699 2.045 2.462 2.756 3.038 3.396 3.659 0.683 0.854 1.055 1.310 1.697 2.042 2.457 2.750 3.030 3.385 3.646 0.681 0.851 1.050 1.303 1.684 2.021 2.423 2.704 2.971 3.307 3.551 0.679 0.849 1.047 1.299 1.676 2.009 2.403 2.678 2.937 3.261 3.496 0.679 0.848 1.045 1.296 1.671 2.000 2.390 2.660 2.915 3.232 3.460 0.678 0.846 1.043 1.292 1.664 1.990 2.374 2.639 2.887 3.195 3.416 0.677 0.845 1.042 1.290 1.660 1.984 2.364 2.626 2.871 3.174 3.390 0.677 0.845 1.041 1.289 1.658 1.980 2.358 2.617 2.860 3.160 3.373 0.674 0.842 1.036 1.282 1.645 1.960 2.326 2.576 2.807 3.090 3.291 ); my @ts_ttable_rows = qw( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 40 50 60 100 1000 inf ); my @ts_ttable_cols = qw(0.5 0.6 0.7 0.8 0.9 0.95 0.96 0.98 0.99 0.995 0.998 0.999); my @ts_ttable = qw( 1.00 1.38 3.08 3.08 6.31 12.71 31.82 63.66 127.3 318.3 636.62 0.82 1.89 1.89 1.89 2.92 4.30 6.96 9.92 14.09 22.33 31.60 0.765 0.978 1.250 1.638 2.353 3.182 3.482 4.541 5.841 7.453 10.214 12.924 0.741 0.941 1.190 1.533 2.132 2.776 2.999 3.747 4.604 5.598 7.173 8.610 0.727 0.920 1.156 1.476 2.015 2.571 2.757 3.365 4.032 4.773 5.893 6.869 0.718 0.906 1.134 1.440 1.943 2.447 2.612 3.143 3.707 4.317 5.208 5.959 0.711 0.896 1.119 1.415 1.895 2.365 2.517 2.998 3.499 4.029 4.785 5.408 0.706 0.889 1.108 1.397 1.860 2.306 2.449 2.896 3.355 3.833 4.501 5.041 0.703 0.883 1.100 1.383 1.833 2.262 2.398 2.821 3.250 3.690 4.297 4.781 0.700 0.879 1.093 1.372 1.812 2.228 2.359 2.764 3.169 3.581 4.144 4.587 0.697 0.876 1.088 1.363 1.796 2.201 2.328 2.718 3.106 3.497 4.025 4.437 0.695 0.873 1.083 1.356 1.782 2.179 2.303 2.681 3.055 3.428 3.930 4.318 0.694 0.870 1.079 1.350 1.771 2.160 2.282 2.650 3.012 3.372 3.852 4.221 0.692 0.868 1.076 1.345 1.761 2.145 2.264 2.624 2.977 3.326 3.787 4.140 0.691 0.866 1.074 1.341 1.753 2.131 2.249 2.602 2.947 3.286 3.733 4.073 0.690 0.865 1.071 1.337 1.746 2.120 2.235 2.583 2.921 3.252 3.686 4.015 0.689 0.863 1.069 1.333 1.740 2.110 2.224 2.567 2.898 3.222 3.646 3.965 0.688 0.862 1.067 1.330 1.734 2.101 2.214 2.552 2.878 3.197 3.611 3.922 0.688 0.861 1.066 1.328 1.729 2.093 2.205 2.539 2.861 3.174 3.579 3.883 0.687 0.860 1.064 1.325 1.725 2.086 2.197 2.528 2.845 3.153 3.552 3.850 0.686 0.859 1.063 1.323 1.721 2.080 2.189 2.518 2.831 3.135 3.527 3.819 0.686 0.858 1.061 1.321 1.717 2.074 2.183 2.508 2.819 3.119 3.505 3.792 0.685 0.858 1.060 1.319 1.714 2.069 2.177 2.500 2.807 3.104 3.485 3.768 0.685 0.857 1.059 1.318 1.711 2.064 2.172 2.492 2.797 3.091 3.467 3.745 0.684 0.856 1.058 1.316 1.708 2.060 2.167 2.485 2.787 3.078 3.450 3.725 0.684 0.856 1.058 1.315 1.706 2.056 2.162 2.479 2.779 3.067 3.435 3.707 0.684 0.855 1.057 1.314 1.703 2.052 2.150 2.473 2.771 3.057 3.421 3.690 0.683 0.855 1.056 1.313 1.701 2.048 2.154 2.467 2.763 3.047 3.408 3.674 0.683 0.854 1.055 1.311 1.699 2.045 2.150 2.462 2.756 3.038 3.396 3.659 0.683 0.854 1.055 1.310 1.697 2.042 2.147 2.457 2.750 3.030 3.385 3.646 0.681 0.851 1.050 1.303 1.684 2.021 2.123 2.423 2.704 2.971 3.307 3.551 0.679 0.849 1.047 1.295 1.676 2.009 2.109 2.403 2.678 2.937 3.261 3.496 0.679 0.848 1.045 1.296 1.671 2.000 2.099 2.390 2.660 2.915 3.232 3.460 0.678 0.846 1.043 1.292 1.664 1.990 2.088 2.374 2.639 2.887 3.195 3.416 0.677 0.845 1.042 1.290 1.660 1.984 2.081 2.364 2.626 2.871 3.174 3.390 0.675 0.842 1.037 1.282 1.646 1.962 2.056 2.330 2.581 2.813 3.098 3.300 0.674 0.841 1.036 1.282 1.640 1.960 2.054 2.326 2.576 2.807 3.091 3.291); my @ztable = qw( 0.0000 0.0040 0.0080 0.0120 0.0160 0.0199 0.0239 0.0279 0.0319 0.0359 0.0398 0.0438 0.0478 0.0517 0.0557 0.0596 0.0636 0.0675 0.0714 0.0753 0.0793 0.0832 0.0871 0.0910 0.0948 0.0987 0.1026 0.1064 0.1103 0.1141 0.1179 0.1217 0.1255 0.1293 0.1331 0.1368 0.1406 0.1443 0.1480 0.1517 0.1554 0.1591 0.1628 0.1664 0.1700 0.1736 0.1772 0.1808 0.1844 0.1879 0.1915 0.1950 0.1985 0.2019 0.2054 0.2088 0.2123 0.2157 0.2190 0.2224 0.2257 0.2291 0.2324 0.2357 0.2389 0.2422 0.2454 0.2486 0.2517 0.2549 0.2580 0.2611 0.2642 0.2673 0.2704 0.2734 0.2764 0.2794 0.2823 0.2852 0.2881 0.2910 0.2939 0.2967 0.2995 0.3023 0.3051 0.3078 0.3106 0.3133 0.3159 0.3186 0.3212 0.3238 0.3264 0.3289 0.3315 0.3340 0.3365 0.3389 0.3413 0.3438 0.3461 0.3485 0.3508 0.3531 0.3554 0.3577 0.3599 0.3621 0.3643 0.3665 0.3686 0.3708 0.3729 0.3749 0.3770 0.3790 0.3810 0.3830 0.3849 0.3869 0.3888 0.3907 0.3925 0.3944 0.3962 0.3980 0.3997 0.4015 0.4032 0.4049 0.4066 0.4082 0.4099 0.4115 0.4131 0.4147 0.4162 0.4177 0.4192 0.4207 0.4222 0.4236 0.4251 0.4265 0.4279 0.4292 0.4306 0.4319 0.4332 0.4345 0.4357 0.4370 0.4382 0.4394 0.4406 0.4418 0.4429 0.4441 0.4452 0.4463 0.4474 0.4484 0.4495 0.4505 0.4515 0.4525 0.4535 0.4545 0.4554 0.4564 0.4573 0.4582 0.4591 0.4599 0.4608 0.4616 0.4625 0.4633 0.4641 0.4649 0.4656 0.4664 0.4671 0.4678 0.4686 0.4693 0.4699 0.4706 0.4713 0.4719 0.4726 0.4732 0.4738 0.4744 0.4750 0.4756 0.4761 0.4767 0.4772 0.4778 0.4783 0.4788 0.4793 0.4798 0.4803 0.4808 0.4812 0.4817 0.4821 0.4826 0.4830 0.4834 0.4838 0.4842 0.4846 0.4850 0.4854 0.4857 0.4861 0.4864 0.4868 0.4871 0.4875 0.4878 0.4881 0.4884 0.4887 0.4890 0.4893 0.4896 0.4898 0.4901 0.4904 0.4906 0.4909 0.4911 0.4913 0.4916 0.4918 0.4920 0.4922 0.4925 0.4927 0.4929 0.4931 0.4932 0.4934 0.4936 0.4938 0.4940 0.4941 0.4943 0.4945 0.4946 0.4948 0.4949 0.4951 0.4952 0.4953 0.4955 0.4956 0.4957 0.4959 0.4960 0.4961 0.4962 0.4963 0.4964 0.4965 0.4966 0.4967 0.4968 0.4969 0.4970 0.4971 0.4972 0.4973 0.4974 0.4974 0.4975 0.4976 0.4977 0.4977 0.4978 0.4979 0.4979 0.4980 0.4981 0.4981 0.4982 0.4982 0.4983 0.4984 0.4984 0.4985 0.4985 0.4986 0.4986 0.4987 0.4987 0.4987 0.4988 0.4988 0.4989 0.4989 0.4989 0.4990 0.4990 0.4990 0.4991 0.4991 0.4991 0.4992 0.4992 0.4992 0.4992 0.4993 0.4993 0.4993 0.4993 0.4994 0.4994 0.4994 0.4994 0.4994 0.4995 0.4995 0.4995 0.4995 0.4995 0.4995 0.4996 0.4996 0.4996 0.4996 0.4996 0.4996 0.4997 0.4997 0.4997 0.4997 0.4997 0.4997 0.4997 0.4997 0.4997 0.4997 0.4998); sub os_ttable_lookup { my ($df, $p) = @_; my $dfi = 0; while (($dfi < $#os_ttable_rows) && ($os_ttable_rows[$dfi] ne "inf") && ($os_ttable_rows[$dfi] < $df) && (($os_ttable_rows[$dfi+1] eq "inf") || ($os_ttable_rows[$dfi+1] <= $df))) { $dfi++; } my $pi = 0; while (($pi < $#os_ttable_cols) && ($os_ttable_cols[$pi] < $p)) { $pi++; } my $ind = ((($#os_ttable_cols)+1) * $dfi) + $pi; return $os_ttable[$ind]; } sub ts_ttable_lookup { my ($df, $p) = @_; my $dfi = 0; while (($dfi < $#ts_ttable_rows) && ($ts_ttable_rows[$dfi] ne "inf") && ($ts_ttable_rows[$dfi] < $df) && (($ts_ttable_rows[$dfi+1] eq "inf") || ($ts_ttable_rows[$dfi+1] <= $df))) { $dfi++; } my $pi = 0; while (($pi < $#ts_ttable_cols) && ($ts_ttable_cols[$pi] < $p)) { $pi++; } my $ind = ((($#ts_ttable_cols)+1) * $dfi) + $pi; return $ts_ttable[$ind]; } sub lookup_z_for_int { my $conf = shift; die("Invalid confidence interval $conf for z lookup.") if (($conf < 0.0) || ($conf >= 1.0)); $conf *= 0.5; my $z = 0; while (($ztable[$z] < $conf) && ($z < $#ztable)) { $z++; } return ($z / 100.0); } sub lookup_z { my $z = shift; my $ind = sprintf("%d", $z*100); return $ztable[$ind]; } 1;