@techreport{TD:101046,
	att_abstract={{Although micro-modulation components such as the formant frequencies are very important characteristics of spoken speech, and great performance improvements have been detailed in small-vocabulary ASR tasks, yet they have limited use in large vocabulary ASR applications.  To successfully use these frequency measures in real-life tasks, we study linear, e.g., HDA, and non-linear (bottleneck MLP) feature transforms for combining features such as the MFCC’s and PLP’s, with the formant frequency-related coefficients. Our experiments show that the integration, using non-linear MLP-based transforms, of micro-modulation and cepstral features greatly improves the ASR with respect to the cepstral features alone. We have applied this novel feature extraction scheme onto two very different tasks, i.e. a clean speech task (DARPA-WSJ) and a real-life, open-vocabulary, mobile search task (Speak4itSM), always reporting improved performance. We report relative error rate reduction of 15% for the Speak4itSM task, and similar improvements, up to 21%, for the WSJ task.}},
	att_authors={eb3134, dd734j},
	att_categories={},
	att_copyright={{IEEE}},
	att_copyright_notice={{This version of the work is reprinted here with permission of IEEE for your personal use. Not for redistribution. The definitive version was published in 2012 {{, 2013-05-26}}{{, http://www.icassp2013.com/}}
}},
	att_donotupload={},
	att_private={false},
	att_projects={},
	att_tags={— Neural networks,  feature extraction,  robustness,  speech recognition.},
	att_techdoc={true},
	att_techdoc_key={TD:101046},
	att_url={http://web1.research.att.com:81/techdocs_downloads/TD:101046_DS1_2012-11-23T18:48:41.104Z.pdf},
	author={Enrico Bocchieri and Dimitrios Dimitriadis},
	institution={{38th International Conference on Acoustics, Speech, and Signal Processing (ICASSP) , IEEE}},
	month={May},
	title={{INVESTIGATING DEEP NEURAL NETWORK BASED TRANSFORMS OF ROBUST AUDIO FEATURES FOR LVCSR}},
	year=2013,
}