@article {13870,
	title = {Task-based evaluation of text summarization using Relevance Prediction},
	journal = {Information Processing \& Management},
	volume = {43},
	year = {2007},
	month = {2007/11//},
	pages = {1482 - 1499},
	abstract = {This article introduces a new task-based evaluation measure called Relevance Prediction that is a more intuitive measure of an individual{\textquoteright}s performance on a real-world task than interannotator agreement. Relevance Prediction parallels what a user does in the real world task of browsing a set of documents using standard search tools, i.e., the user judges relevance based on a short summary and then that same user{\textemdash}not an independent user{\textemdash}decides whether to open (and judge) the corresponding document. This measure is shown to be a more reliable measure of task performance than LDC Agreement, a current gold-standard based measure used in the summarization evaluation community. Our goal is to provide a stable framework within which developers of new automatic measures may make stronger statistical statements about the effectiveness of their measures in predicting summary usefulness. We demonstrate{\textemdash}as a proof-of-concept methodology for automatic metric developers{\textemdash}that a current automatic evaluation measure has a better correlation with Relevance Prediction than with LDC Agreement and that the significance level for detected differences is higher for the former than for the latter.},
	keywords = {Relevance prediction, Summarization evaluation, Summary usefulness},
	isbn = {0306-4573},
	doi = {10.1016/j.ipm.2007.01.002},
	url = {http://www.sciencedirect.com/science/article/pii/S0306457307000234},
	author = {Hobson,Stacy President and Dorr, Bonnie J and Monz,Christof and Schwartz,Richard}
}