errity_bib.bib

@PHDTHESIS{Errity2010,
  AUTHOR = {A. Errity},
  TITLE = {Exploring the dimensionality of speech using manifold learning and
	dimensionality reduction methods},
  SCHOOL = {Dublin City University},
  YEAR = {2010},
  ABSTRACT = {Many previous investigations have indicated that speech data has inherent
	low-dimensional structure and that it may be possible to efficiently
	represent speech using only a small number of parameters. This view
	is motivated by the fact that articulatory movement is limited by
	physiological constraints and thus the speech production apparatus
	has only limited degrees of freedom. Also, the set of sounds used
	in human spoken communication is only a small subset of all producible
	sounds. A number of dimensionality reduction methods capable of discovering
	such underlying structure have previously been applied to speech.
	However, if speech lies on a manifold nonlinearly embedded in high-dimensional
	space, as has been proposed in the past, classic linear dimensionality
	reduction methods would be unable to discover this embedding. In
	this dissertation a number of manifold learning, also referred to
	as nonlinear dimensionality reduction, methods are applied to speech
	to explore the possibility of underlying nonlinear manifold structure.
	
	
	This dissertation describes a number of existing manifold learning
	methods and details the application of these methods to high-dimensional
	feature representations of speech data. Representations derived from
	the conventional magnitude spectrum and less widely used phase spectrum
	are investigated. The manifold learning methods used in this study
	are locally linear embedding, Isomap, and Laplacian eigenmaps. The
	classic linear method, principal component analysis (PCA), is also
	applied to facilitate the comparison of linear and nonlinear methods.
	The resulting low-dimensional representations are analysed through
	visualisation, phone recognition, and speaker recognition experiments.
	The recognition experiments are used as a means of evaluating how
	much meaningful discriminatory information is contained in the low-dimensional
	representations produced by each method. These experiments also serve
	to display the potential value of these methods in speech processing
	applications.
	
	
	The manifold learning methods are shown to be capable of producing
	meaningful lowdimensional representations of speech data suggesting
	speech has low-dimensional manifold structure. In general, these
	methods are found to outperform PCA in low dimensions, indicating
	that speech may lie on a manifold nonlinearly embedded in high-dimensional
	space. Phone classification experiments show that Isomap can offer
	improvements over standard features and PCA-transformed features.
	Investigation of magnitude and phase spectrum representations found
	both to have similar low-dimensional structure and confirm that the
	phase spectrum contains useful information for phone discrimination.
	Results indicate that combining magnitude and phase spectrum information
	yields improvements in phone classification tasks. A method to combine
	magnitude and phase spectrum features for increased phone classification
	accuracy without large increases in feature dimensionality is also
	described.},
  OWNER = {ame},
  TIMESTAMP = {2010.04.06},
  URL = {http://doras.dcu.ie/15142/}
}

@INPROCEEDINGS{Errity2009,
  AUTHOR = {A. Errity and J. McKenna},
  TITLE = {A Comparison of Linear and Nonlinear Dimensionality Reduction Methods
	Applied to Synthetic Speech},
  BOOKTITLE = INTERSPEECH2009,
  YEAR = {2009},
  PAGES = {1095--1098},
  ADDRESS = {Brighton, UK},
  MONTH = {September},
  ABSTRACT = {In this study a number of linear and nonlinear dimensionality reduction
	methods are applied to high dimensional representations of synthetic
	speech to produce corresponding low dimensional embeddings. Several
	important characteristics of the synthetic speech, such as formant
	frequencies and f0, are known and controllable prior to dimensionality
	reduction. The degree to which these characteristics are retained
	after dimensionality reduction is examined in visualisation and classification
	experiments. Results of these experiments indicate that each method
	is capable of discovering meaningful low dimensional representations
	of synthetic speech and that the nonlinear methods may outperform
	linear methods in some cases.},
  OWNER = {ame},
  TIMESTAMP = {2009.06.20}
}

@INPROCEEDINGS{Errity2007a,
  AUTHOR = {A. Errity and J. McKenna},
  TITLE = {A comparative study of linear and nonlinear dimensionality reduction
	for speaker identification},
  BOOKTITLE = {Proc. of the 15th Int. Conf. on Digital Signal Processing (DSP)},
  YEAR = {2007},
  PAGES = {587--590},
  ADDRESS = {Cardiff, Wales},
  MONTH = {July},
  ABSTRACT = {In this paper we apply linear and nonlinear dimensionality reduction
	methods to speech produced by a number of different speakers in an
	effort to yield low dimensional features capable of discriminating
	between speakers. The classical linear dimensionality reduction method,
	principal component analysis ({PCA}), and the nonlinear manifold
	learning method, {I}somap, are investigated. The resulting features
	are evaluated in {GMM}-based speaker identification experiments and
	compared to conventional cepstral features. {I}somap is shown to
	give the highest accuracy for very low dimensions, outperforming
	{MFCC}s and {PCA} transformed features. Isomap is shown to be useful
	for visualisation of speaker clusters. For higher dimensions, speaker
	identification results indicate that features resulting from {PCA}
	offer improvements over conventional {MFCC}s.},
  OWNER = {aerrity},
  TIMESTAMP = {2007.04.24},
  URL = {http://ieeexplore.ieee.org/iel5/4288490/4288491/04288650.pdf}
}

@INPROCEEDINGS{Errity2006,
  AUTHOR = {A. Errity and J. McKenna},
  TITLE = {An Investigation of Manifold Learning for Speech Analysis},
  BOOKTITLE = {Proc. of the Int. Conf. on Spoken Language Processing (Interspeech
	2006 - ICSLP)},
  YEAR = {2006},
  PAGES = {2506--2509},
  ADDRESS = {Pittsburgh PA, USA},
  MONTH = {September},
  ABSTRACT = {Due to the physiological constraints of articulatory motion the speech
	apparatus has limited degrees of freedom. As a result, the range
	of speech sounds a human is capable of producing may lie on a low
	dimensional submanifold of the high dimensional space of all possible
	sounds. In this study a number of manifold learning algorithms are
	applied to speech data in an effort to extract useful low dimensional
	structure from the high dimensional speech signal. The ability of
	these manifold learning algorithms to separate vowels in a low dimensional
	space is evaluated and compared to a classical linear dimensionality
	reduction method. Results indicate that manifold learning algorithms
	outperform classical methods in low dimensions and are capable of
	discovering useful manifold structure in speech data.},
  KEYWORDS = {ISOMAP, LLE, nonlinear dimensionality reduction, speech dimensionality},
  OWNER = {aerrity},
  TIMESTAMP = {2006.06.21},
  URL = {http://www.isca-speech.org/archive/archive_papers/interspeech_2006/i06_1667.pdf}
}

@INPROCEEDINGS{Errity2004,
  AUTHOR = {A. Errity and J. McKenna and S. Isard},
  TITLE = {Unscented {K}alman Filtering of Line Spectral Frequencies},
  BOOKTITLE = {Proc. of the Int. Conf. on Spoken Language Processing (Interspeech
	2004 - ICSLP)},
  YEAR = {2004},
  PAGES = {2697--2700},
  ADDRESS = {Jeju, Korea},
  MONTH = {October},
  ABSTRACT = {We propose a new method for estimating Line Spectral Frequency (LSF)
	trajectories that uses unscented {K}alman filtering (UKF). This method
	is based upon an iterative Expectation Maximisation (EM) approach
	in which LSF estimates are generated during a forward pass and then
	smoothed during a backward pass. The EM approach also provides re-estimated
	{K}alman filter parameters for further forward-backward passes that
	improve estimation. This approach exploits the non-independence of
	neighbouring spectra. We estimate LSFs as they have good interpolation
	and quantization properties. This allows us to estimate LSF trajectories
	that are guaranteed to result in stable filters. We analyse noisy
	synthetic speech using this technique. The results compare favourably
	with other methods.},
  OWNER = {aerrity},
  TIMESTAMP = {2008.02.09},
  URL = {http://www.isca-speech.org/archive/archive_papers/interspeech_2004/i04_2697.pdf}
}

@INPROCEEDINGS{Errity2007Springer,
  AUTHOR = {Andrew Errity and John McKenna and Barry Kirkpatrick},
  TITLE = {Manifold Learning-Based Feature Transformation for Phone Classification},
  BOOKTITLE = {Advances in Nonlinear Speech Processing, International Conference
	on Non-Linear Speech Processing, NOLISP 2007, Paris, France, May
	22-25, 2007, Revised Selected Papers},
  YEAR = {2007},
  EDITOR = {Mohamed Chetouani and Amir Hussain and Bruno Gas and Maurice Milgram
	and Jean-Luc Zarader},
  VOLUME = {4885},
  SERIES = {Lecture Notes in Computer Science},
  PAGES = {132--141},
  PUBLISHER = {Springer},
  ABSTRACT = {This paper investigates approaches for low dimensional speech feature
	transformation using manifold learning. It has recently been shown
	that speech sounds may exist on a low dimensional manifold nonlinearly
	embedded in high dimensional space. A number of techniques have been
	developed in recent years that attempt to discover the geometric
	structure of the underlying low dimensional manifold. The manifold
	learning techniques locally linear embedding and {I}somap are considered
	in this study. The low dimensional feature representations produced
	by these techniques are applied to several phone classification tasks
	on the TIMIT corpus. Classification accuracy is analysed and compared
	to conventional MFCC features and PCA, a linear dimensionality reduction
	method, transformed features. It is shown that features resulting
	from manifold learning are capable of yielding higher classification
	accuracy than these baseline features. The best phone classification
	accuracy in general is demonstrated by feature transformation with
	{I}somap.},
  OWNER = {ame},
  TIMESTAMP = {2008.02.09},
  URL = {http://www.springerlink.com/content/d41l8t1475608612/fulltext.pdf}
}

@INPROCEEDINGS{Errity2007,
  AUTHOR = {A. Errity and J. McKenna and B. Kirkpatrick},
  TITLE = {Manifold learning-based feature transformation for phone classification},
  BOOKTITLE = {Proc. of the ISCA Tutorial and Research Workshop on Nonlinear Speech
	Processing (NOLISP)},
  YEAR = {2007},
  PAGES = {43--46},
  ADDRESS = {Paris, France},
  MONTH = {May},
  ABSTRACT = {This paper investigates approaches for low dimensional speech feature
	transformation using manifold learning. It has recently been shown
	that speech sounds may exist on a low dimensional manifold nonlinearly
	embedded in high dimensional space. A number of techniques have been
	developed in recent years that attempt to discover the geometric
	structure of the underlying low dimensional manifold. The manifold
	learning techniques locally linear embedding and {I}somap are considered
	in this study. The low dimensional feature representations produced
	by these techniques are applied to several phone classification tasks
	on the TIMIT corpus. Classification accuracy is analysed and compared
	to conventional MFCC features and PCA, a linear dimensionality reduction
	method, transformed features. It is shown that features resulting
	from manifold learning are capable of yielding higher classification
	accuracy than these baseline features. The best phone classification
	accuracy in general is demonstrated by feature transformation with
	{I}somap.},
  OWNER = {aerrity},
  TIMESTAMP = {2007.04.24},
  URL = {http://www.isca-speech.org/archive/archive_papers/nolisp07/nol7_039.pdf}
}

@INPROCEEDINGS{Errity2007b,
  AUTHOR = {A. Errity and J. McKenna and B. Kirkpatrick},
  TITLE = {Dimensionality reduction methods applied to both magnitude and phase
	derived features},
  BOOKTITLE = {Proc. of Interspeech 2007 - Eurospeech},
  YEAR = {2007},
  PAGES = {1957--1960},
  ADDRESS = {Antwerp, Belgium},
  MONTH = {August},
  ABSTRACT = {A number of previous studies have shown that speech sounds may have
	an intrinsic low dimensional structure. Such studies have focused
	on magnitude-based features ignoring phase information, as is the
	convention in many speech processing applications. In this paper
	dimensionality reduction methods are applied to MFCC and modified
	group delay function (MODGDF) features derived from the magnitude
	and phase spectrum, respectively. The low dimensional structure of
	these representations is examined and a method to combine these features
	is detailed. Results show that both magnitude and phase derived features
	have a low dimensional structure. MFCCs are found to offer higher
	accuracy than MODGDFs in phone classification tasks. Results indicate
	that combining MFCCs and MODGDFs gives improvements for phone classification.
	PCA is shown to be capable of efficiently combining MFCCs and MODGDFs
	for improved classification accuracy without large increases in feature
	dimensionality.},
  KEYWORDS = {manifold learning, dimensionality reduction, phase, modified group
	delay function},
  OWNER = {ame},
  TIMESTAMP = {2007.05.31},
  URL = {http://www.isca-speech.org/archive/archive_papers/interspeech_2007/i07_1957.pdf}
}

@INPROCEEDINGS{Kirkpatrick2007,
  AUTHOR = {B. Kirkpatrick and D. O'Brien and R. Scaife and A. Errity},
  TITLE = {Spectral dynamics as a source of discontinuity in concatenative speech
	synthesis},
  BOOKTITLE = {Proc. of the 15th Int. Conf. on Digital Signal Processing (DSP)},
  YEAR = {2007},
  PAGES = {615--618},
  ADDRESS = {Cardiff, Wales},
  MONTH = {July},
  ABSTRACT = {The quality of concatenative speech synthesis depends on the cost
	function employed for unit selection. Effective cost functions for
	spectral continuity have proven difficult to define and standard
	measures do not accurately reflect human perception of spectral discontinuity
	in concatenated speech. Previous studies on spectral join costs have
	focused predominantly on static spectral measures extracted from
	the unit boundary. In this paper spectral dynamic behaviour is investigated
	as a source of discontinuity in concatenated speech. A number of
	measures representing spectral dynamics are tested for the task of
	detecting discontinuities. The spectral dynamic measures tested contain
	information correlating with human perception of discontinuities,
	suggesting that spectral dynamics are a source of discontinuity in
	concatenated speech. A strategy to effectively combine dynamic and
	static measures is proposed using principal component analysis (PCA).},
  OWNER = {aerrity},
  TIMESTAMP = {2007.04.24},
  URL = {http://ieeexplore.ieee.org/iel5/4288490/4288491/04288657.pdf}
}

@INPROCEEDINGS{Kirkpatrick2007a,
  AUTHOR = {B. Kirkpatrick and D. O'Brien and R. Scaife and A. Errity},
  TITLE = {On the Role of Spectral Dynamics in Unit Selection Speech Synthesis},
  BOOKTITLE = {Proc. of Interspeech 2007 - Eurospeech},
  YEAR = {2007},
  PAGES = {2889--2892},
  ADDRESS = {Antwerp, Belgium},
  MONTH = {August},
  OWNER = {ame},
  TIMESTAMP = {2007.05.31},
  URL = {http://www.isca-speech.org/archive/archive_papers/interspeech_2007/i07_2889.pdf}
}

@COMMENT{{jabref-meta: selector_publisher:}}
@COMMENT{{jabref-meta: selector_author:}}
@COMMENT{{jabref-meta: selector_journal:}}
@COMMENT{{jabref-meta: selector_keywords:}}

This file has been generated by bibtex2html 1.74