@article {14414, title = {Deduplication and group detection using links}, journal = {KDD workshop on link analysis and group detection}, year = {2004}, month = {2004///}, abstract = {Clustering is a fundamental problem in data mining. Tradi-tionally, clustering is done based on the similarity of the at- tribute values of the entities to be clustered. More recently, there has been greater interest in clustering relational and structured data. Often times this data is best described as a graph, in which there are both entities, described by a collec- tion of attributes, and links between entities, representing the relations between them. Clustering in these scenarios becomes more complex, as we should also take into account the similarity of the entity links when we are clustering. We propose novel distance measures for clustering linked data, and show how they can be used to solve two important data mining tasks, entity deduplication and group discovery. }, author = {Bhattacharya,I. and Getoor, Lise} }