@techreport{TD:100813,
	att_abstract={{We introduce the notion of {em statistical distortion}
as an essential metric for measuring the effectiveness of data cleaning
strategies. 

We use this metric to propose a widely applicable yet scalable experimental framework 
for evaluating data cleaning strategies along three dimensions: 
glitch improvement, statistical distortion and cost-related criteria. 
Existing metrics focus on glitch improvement and cost, but not on the 
statistical impact of data cleaning strategies. 
We illustrate our framework on real world data, with a comprehensive suite of
experiments and analyses.}},
	att_authors={td3863, jl213k},
	att_categories={},
	att_copyright={{VLDB Foundation}},
	att_copyright_notice={{The definitive version was published in Very Large Databases, 2012. {{, Volume PVLDB vol.5/VLDB 2012}}{{, 2012-08-30}}
}},
	att_donotupload={},
	att_private={false},
	att_projects={},
	att_tags={Data Quality,  },
	att_techdoc={true},
	att_techdoc_key={TD:100813},
	att_url={http://web1.research.att.com:81/techdocs_downloads/TD:100813_DS1_2012-09-05T15:28:37.085Z.pdf},
	author={Tamraparni Dasu and Ji Loh},
	institution={{VLDB foundation}},
	month={August},
	title={{Statistical Distortion: Consequences of Data Cleaning}},
	year=2012,
}