@techreport{TD:101199,
	att_abstract={{Semi-structured data such as XML are popular for data interchange and storage. However, many XML documents have improper nesting where open- and close-tags are unmatched. Since some semi-structured data (e.g., Latex) have a flexible grammar and since many XML documents lack an accompanying DTD or XSD, we focus on computing a syntactic repair via the edit distance.

To solve this problem, we propose a dynamic programming algorithm which takes cubic time. While this algorithm is not scalable, well-formed substrings of the data can be pruned to enable faster computation. Unfortunately, there are still cases where the dynamic program could be very expensive; hence, we give branch-and-bound algorithms based on various combinations of two heuristics, called MinCost and MaxBenefit, that trade off between accuracy and efficiency. Finally, we experimentally demonstrate the performance of these algorithms on real data.}},
	att_authors={pk1785, bs621s, ds8961},
	att_categories={C_NSS.2},
	att_copyright={{VLDB Foundation}},
	att_copyright_notice={{The definitive version was published in Very Large Databases, 2013. {{, 2013-08-26}}
}},
	att_donotupload={},
	att_private={false},
	att_projects={},
	att_tags={},
	att_techdoc={true},
	att_techdoc_key={TD:101199},
	att_url={http://web1.research.att.com:81/techdocs_downloads/TD:101199_DS1_2013-05-30T16:18:59.419Z.pdf},
	author={Philip Korn and Barna Saha and Divesh Srivastava and Shanshan Ying},
	institution={{VLDB 2013}},
	month={August},
	title={{On Repairing Structural Problems in Semi-structured Data}},
	year=2013,
}