@techreport{TD:100269,
	att_abstract={{In this work we try to cluster relational columns into attributes, i.e., to identify strong relationships between columns
based on the common properties and characteristics of the values they contain.
For example, identifying whether a certain set of columns refers
to telephone numbers versus social security numbers, or names of customers versus names of employees.
Traditional relational database schema languages use very limited primitive data types and
simple foreign key constraints to express relationships between columns. Object oriented schema
languages allow the definition of custom data types, but still, certain relationships between
columns might be unknown at design time or they might appear only in a particular database instance.
Nevertheless, these relationships are an invaluable tool for schema matching, and generally for
better understanding and working with the data. Here, we introduce data oriented solutions (we do not consider solutions that
assume the existence of any external knowledge), that use statistical measures to identify strong relationships
between the values of a set of columns. Interpreting the
database as a graph where nodes correspond to database columns and edges correspond to column
relationships, we decompose the graph into connected components and cluster sets of columns into
attributes. To test the quality of our solutions, we also provide a
comprehensive experimental evaluation using real and synthetic datasets.
}},
	att_authors={ds8961, cp2838, mh6516},
	att_categories={C_IIS.5, C_CCF.1, C_IIS.1},
	att_copyright={{ACM}},
	att_copyright_notice={{(c) ACM, 2011. This is the author's version of the work. It is posted here by permission of ACM for your personal use. Not for redistribution. The definitive version was published in ACM SIGMOD 2011 {{, 2011-06-12}}{{, http://www.acm.org/}}. }},
	att_donotupload={},
	att_private={false},
	att_projects={},
	att_tags={Schema matching,  Data integration},
	att_techdoc={true},
	att_techdoc_key={TD:100269},
	att_url={http://web1.research.att.com:81/techdocs_downloads/TD:100269_DS1_2011-06-20T13:40:02.671Z.pdf},
	author={Meihui Zhang and Beng Chin Ooi and Divesh Srivastava and Cecilia Procopiuc and Marios Hadjieleftheriou},
	institution={{ACM SIGMOD 2011}},
	month={June},
	title={{Automatic Discovery of Attributes in Relational Databases}},
	year=2011,
}