@article {13714, title = {A study of translation edit rate with targeted human annotation}, journal = {Proceedings of Association for Machine Translation in the Americas}, year = {2006}, month = {2006///}, pages = {223 - 231}, abstract = {We examine a new, intuitive measurefor evaluating machine-translation output that avoids the knowledge intensiveness of more meaning-based approaches, and the labor-intensiveness of human judg- ments. Translation Edit Rate (TER) mea- sures the amount of editing that a hu- man would have to perform to change a system output so it exactly matches a reference translation. We show that the single-reference variant of TER correlates as well with human judgments of MT quality as the four-reference variant of BLEU. We also define a human-targeted TER (or HTER) and show that it yields higher correlations with human judgments than BLEU{\textemdash}even when BLEU is given human-targeted references. Our results in- dicate that HTER correlates with human judgments better than HMETEOR and that the four-reference variants of TER and HTER correlate with human judg- ments as well as{\textemdash}or better than{\textemdash}a sec- ond human judgment does. }, author = {Snover,M. and Dorr, Bonnie J and Schwartz,R. and Micciulla,L. and Makhoul,J.} }