@conference {16417, title = {When does simulated data match real data?}, booktitle = {Proceedings of the 13th annual conference companion on Genetic and evolutionary computation}, series = {GECCO {\textquoteright}11}, year = {2011}, month = {2011///}, pages = {231 - 232}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Agent-based models can replicate real-world patterns, but finding parameters that achieve the best match can be difficult. To validate a model, a real-world dataset is often divided into a training set (to calibrate the parameters) and a test set (to validate the calibrated model). The difference between the training and test data and the simulated data is determined using an error measure. In the context of evolutionary computation techniques, the error measure also serves as a fitness function, and thus affects evolutionary search dynamics. We survey the effect of five different error measures on both a toy problem and a real world problem of matching a model to empirical online news consumption behavior. We use each error measure separately for calibration on the training dataset, and then examine the results of all five error measures on both the training and testing datasets. We show that certain error measures sometimes serve as better fitness functions than others, and in fact using one error measure may result in better calibration (on a different measure) than using the different measure directly. For the toy problem, the Pearson{\textquoteright}s correlation measure dominated all other measures, but no single error measure was Pareto dominant for the real world problem.}, keywords = {Agent-based modeling, business, Calibration, Genetic algorithms, information search, network analysis}, isbn = {978-1-4503-0690-4}, doi = {10.1145/2001858.2001988}, url = {http://doi.acm.org/10.1145/2001858.2001988}, author = {Stonedahl,Forrest and Anderson,David and Rand, William} }