@techreport{TD:100184,
	att_abstract={{This paper describes the techniques developed to enhance the search coverage of domain-specific Language Models (LM) by incorporating domain-independent language knowledge from the Google 1 trillion n-grams corporal created from general World Wide Web (WWW) documents. The purpose of our study is to explore a Natural Language (NL) based multimodal search interface for TV-oriented online Electronic Programming Guides (EPG) supporting both typed and spoken queries. The proposed method uses a two-pass procedure that combines the domain knowledge derived from an EPG-specific training corpus with the Google n-gram corporal as a domain-independent phrase dictionary with built-in usage counts from the web page authors. The enhanced LMs are able to achieve an absolute improvement of 23% on the model coverage (recall accuracy) without reducing the precision accuracy measured by Word Error Rate (WER) on a test set of in-domain spoken query utterances.}},
	att_authors={hc4395},
	att_categories={C_IIS.11, C_IIS.2, C_IIS.10, C_IIS.4},
	att_copyright={{Springer}},
	att_copyright_notice={{The definitive version was published in International Conference on Artificial Intelligence and Soft Computing. {{, Volume 7268/2012}}{{, Issue 10.1007/978-3-642-29350-4_5}}{{, 2012-04-29}}{{, 10.1007/978-3-642-29350-4_5}}
}},
	att_donotupload={true},
	att_private={false},
	att_projects={SpokenDialog},
	att_tags={Natural language modeling,  linguistic properties of entertainment langu,  electronic programming guide,  metadata harvesting},
	att_techdoc={true},
	att_techdoc_key={TD:100184},
	att_url={},
	author={Hisao Chang},
	institution={{Artificial Intelligence and Soft Computing 
Lecture Notes in Computer Science}},
	month={April},
	title={{Enriching Domain-based Language Models Using Domain-independent WWW N-gram Corpus}},
	year=2012,
}