#!/usr/bin/perl # Script to build nugget pyramids out of individual vital/okay # judgments. Please refer to the following citation: # # Jimmy Lin and Dina Demner-Fushman. Will Pyramids Built of Nuggets # Topple Over? Proceedings of the 2006 Human Language Technology # Conference and the North American Chapter of the Association for # Computational Linguistics Annual Meeting (HLT/NAACL 2006), June # 2006, New York City, New York. # @judgments = ( "trec2003.nuggets.0", "trec2003.nuggets.1", "trec2003.nuggets.2", "trec2003.nuggets.3", "trec2003.nuggets.4", "trec2003.nuggets.5", "trec2003.nuggets.6", "trec2003.nuggets.7", "trec2003.nuggets.8", "trec2003.nuggets.9", ); # first, load all nuggets foreach $curfile ( @judgments ) { #print "loading $curfile...\n"; open(FILE, "< ./$curfile"); while ( ) { if ( /([\d.]+)\s+(\d+)\s+(vital|okay)/ ) { $jud = $3; $qid = $1; $nid = $2; #print "$qid#$nid\t$jud\n"; $raw{$curfile}{"$qid#$nid"} = $jud; } } } # iterate to count vitals open(FILE, "< $judgments[0]"); while ( ) { if ( /([\d.]+)\s+(\d+)\s+(vital|okay)\s+([^\n]+)/ ) { $qid = $1; $nid = $2; $nugget = $4; $cnt = 0; foreach $f ( @judgments ) { $jud = $raw{$f}{"$qid#$nid"}; #print "$f $jud\n"; $cnt++ if ( $jud eq 'vital' ); } $vital_cnt{$qid}{$nid} = $cnt; #print "$qid\t$nid\t$cnt\t$nugget\n"; } } close(FILE); # finally, normalize and print out open(FILE, "< $judgments[0]"); while ( ) { if ( /([\d.]+)\s+(\d+)\s+(vital|okay)\s+([^\n]+)/ ) { $qid = $1; $nid = $2; $nugget = $4; $max = 0; foreach $k ( keys %{$vital_cnt{$qid}} ) { $max = $vital_cnt{$qid}{$k} if $vital_cnt{$qid}{$k} > $max; } $score = $vital_cnt{$qid}{$nid} / $max; print "$qid\t$nid\t$score\t$nugget\n"; } else { print; } } close(FILE);