@techreport{TD:101943,
	att_abstract={{We present SIRUM: a system for Scalable Informative RUle Mining from multi-dimensional data. Informative rules have recently been studied in several contexts, including data summarization, data cube exploration and data quality. The objective is to produce a small set of rules (patterns) over the values of the dimension attributes that provide the most information about the distribution of a numeric measure attribute. Within SIRUM, we propose several optimizations for tall, wide and distributed datasets. We implemented SIRUM in Spark and observed significant performance and scalability improvements on real datasets due to our optimizations. As a result, SIRUM is able to generate informative rules on much wider and taller datasets than using distributed implementations of the previous state of the art.}},
	att_authors={ds8961},
	att_categories={C_BB.1, C_NSS.2, C_IIS.2},
	att_copyright={{IEEE}},
	att_copyright_notice={{This version of the work is reprinted here with permission of IEEE for your personal use. Not for redistribution. The definitive version was published in IEEE International Conference on Data Engineering . {{, 2017-04-19}}
}},
	att_donotupload={},
	att_private={false},
	att_projects={},
	att_tags={},
	att_techdoc={true},
	att_techdoc_key={TD:101943},
	att_url={http://web1.research.att.com:81/techdocs_downloads/TD:101943_DS1_2017-03-07T06:02:50.085Z.pdf},
	author={Divesh Srivastava and Guoyao Feng and Lukasz Golab},
	institution={{IEEE International Conference on Data Engineering }},
	month={April},
	title={{Scalable Informative Rule Mining}},
	year=2017,
}