@INPROCEEDINGS{Errity2009,
AUTHOR = {A. Errity and J. McKenna},
TITLE = {A Comparison of Linear and Nonlinear Dimensionality Reduction Methods
Applied to Synthetic Speech},
BOOKTITLE = {Proc. of Interspeech 2009 - Eurospeech},
YEAR = {2009},
ADDRESS = {Brighton, UK},
MONTH = {September},
PAGES = {1095--1098},
ABSTRACT = {In this study a number of linear and nonlinear dimensionality reduction
methods are applied to high dimensional representations of synthetic
speech to produce corresponding low dimensional embeddings. Several
important characteristics of the synthetic speech, such as formant
frequencies and f0, are known and controllable prior to dimensionality
reduction. The degree to which these characteristics are retained
after dimensionality reduction is examined in visualisation and classification
experiments. Results of these experiments indicate that each method
is capable of discovering meaningful low dimensional representations
of synthetic speech and that the nonlinear methods may outperform
linear methods in some cases.},
OWNER = {ame},
TIMESTAMP = {2009.06.20}
}
@INPROCEEDINGS{Errity2007a,
AUTHOR = {A. Errity and J. McKenna},
TITLE = {A comparative study of linear and nonlinear dimensionality reduction
for speaker identification},
BOOKTITLE = {Proc. of the 15th Int. Conf. on Digital Signal Processing (DSP)},
YEAR = {2007},
PAGES = {587--590},
ADDRESS = {Cardiff, Wales},
MONTH = {July},
ABSTRACT = {In this paper we apply linear and nonlinear dimensionality reduction
methods to speech produced by a number of different speakers in an
effort to yield low dimensional features capable of discriminating
between speakers. The classical linear dimensionality reduction method,
principal component analysis ({PCA}), and the nonlinear manifold
learning method, {I}somap, are investigated. The resulting features
are evaluated in {GMM}-based speaker identification experiments and
compared to conventional cepstral features. {I}somap is shown to
give the highest accuracy for very low dimensions, outperforming
{MFCC}s and {PCA} transformed features. Isomap is shown to be useful
for visualisation of speaker clusters. For higher dimensions, speaker
identification results indicate that features resulting from {PCA}
offer improvements over conventional {MFCC}s.},
OWNER = {aerrity},
TIMESTAMP = {2007.04.24},
URL = {http://ieeexplore.ieee.org/iel5/4288490/4288491/04288650.pdf}
}
@INPROCEEDINGS{Errity2006,
AUTHOR = {A. Errity and J. McKenna},
TITLE = {An Investigation of Manifold Learning for Speech Analysis},
BOOKTITLE = {Proc. of the Int. Conf. on Spoken Language Processing (Interspeech
2006 - ICSLP)},
YEAR = {2006},
PAGES = {2506--2509},
ADDRESS = {Pittsburgh PA, USA},
MONTH = {September},
ABSTRACT = {Due to the physiological constraints of articulatory motion the speech
apparatus has limited degrees of freedom. As a result, the range
of speech sounds a human is capable of producing may lie on a low
dimensional submanifold of the high dimensional space of all possible
sounds. In this study a number of manifold learning algorithms are
applied to speech data in an effort to extract useful low dimensional
structure from the high dimensional speech signal. The ability of
these manifold learning algorithms to separate vowels in a low dimensional
space is evaluated and compared to a classical linear dimensionality
reduction method. Results indicate that manifold learning algorithms
outperform classical methods in low dimensions and are capable of
discovering useful manifold structure in speech data.},
KEYWORDS = {ISOMAP, LLE, nonlinear dimensionality reduction, speech dimensionality},
OWNER = {aerrity},
TIMESTAMP = {2006.06.21},
URL = {http://www.isca-speech.org/archive/archive_papers/interspeech_2006/i06_1667.pdf}
}
@INPROCEEDINGS{Errity2004,
AUTHOR = {A. Errity and J. McKenna and S. Isard},
TITLE = {Unscented {K}alman Filtering of Line Spectral Frequencies},
BOOKTITLE = {Proc. of the Int. Conf. on Spoken Language Processing (Interspeech
2004 - ICSLP)},
YEAR = {2004},
PAGES = {2697--2700},
ADDRESS = {Jeju, Korea},
MONTH = {October},
ABSTRACT = {We propose a new method for estimating Line Spectral Frequency (LSF)
trajectories that uses unscented {K}alman filtering (UKF). This method
is based upon an iterative Expectation Maximisation (EM) approach
in which LSF estimates are generated during a forward pass and then
smoothed during a backward pass. The EM approach also provides re-estimated
{K}alman filter parameters for further forward-backward passes that
improve estimation. This approach exploits the non-independence of
neighbouring spectra. We estimate LSFs as they have good interpolation
and quantization properties. This allows us to estimate LSF trajectories
that are guaranteed to result in stable filters. We analyse noisy
synthetic speech using this technique. The results compare favourably
with other methods.},
OWNER = {aerrity},
TIMESTAMP = {2008.02.09},
URL = {http://www.isca-speech.org/archive/archive_papers/interspeech_2004/i04_2697.pdf}
}
@INPROCEEDINGS{Errity2007Springer,
AUTHOR = {Andrew Errity and John McKenna and Barry Kirkpatrick},
TITLE = {Manifold Learning-Based Feature Transformation for Phone Classification},
BOOKTITLE = {Advances in Nonlinear Speech Processing, International Conference
on Non-Linear Speech Processing, NOLISP 2007, Paris, France, May
22-25, 2007, Revised Selected Papers},
YEAR = {2007},
EDITOR = {Mohamed Chetouani and Amir Hussain and Bruno Gas and Maurice Milgram
and Jean-Luc Zarader},
VOLUME = {4885},
SERIES = {Lecture Notes in Computer Science},
PAGES = {132--141},
PUBLISHER = {Springer},
ABSTRACT = {This paper investigates approaches for low dimensional speech feature
transformation using manifold learning. It has recently been shown
that speech sounds may exist on a low dimensional manifold nonlinearly
embedded in high dimensional space. A number of techniques have been
developed in recent years that attempt to discover the geometric
structure of the underlying low dimensional manifold. The manifold
learning techniques locally linear embedding and {I}somap are considered
in this study. The low dimensional feature representations produced
by these techniques are applied to several phone classification tasks
on the TIMIT corpus. Classification accuracy is analysed and compared
to conventional MFCC features and PCA, a linear dimensionality reduction
method, transformed features. It is shown that features resulting
from manifold learning are capable of yielding higher classification
accuracy than these baseline features. The best phone classification
accuracy in general is demonstrated by feature transformation with
{I}somap.},
OWNER = {ame},
TIMESTAMP = {2008.02.09},
URL = {http://www.springerlink.com/content/d41l8t1475608612/fulltext.pdf}
}
@INPROCEEDINGS{Errity2007,
AUTHOR = {A. Errity and J. McKenna and B. Kirkpatrick},
TITLE = {Manifold learning-based feature transformation for phone classification},
BOOKTITLE = {Proc. of the ISCA Tutorial and Research Workshop on Nonlinear Speech
Processing (NOLISP)},
YEAR = {2007},
PAGES = {43--46},
ADDRESS = {Paris, France},
MONTH = {May},
ABSTRACT = {This paper investigates approaches for low dimensional speech feature
transformation using manifold learning. It has recently been shown
that speech sounds may exist on a low dimensional manifold nonlinearly
embedded in high dimensional space. A number of techniques have been
developed in recent years that attempt to discover the geometric
structure of the underlying low dimensional manifold. The manifold
learning techniques locally linear embedding and {I}somap are considered
in this study. The low dimensional feature representations produced
by these techniques are applied to several phone classification tasks
on the TIMIT corpus. Classification accuracy is analysed and compared
to conventional MFCC features and PCA, a linear dimensionality reduction
method, transformed features. It is shown that features resulting
from manifold learning are capable of yielding higher classification
accuracy than these baseline features. The best phone classification
accuracy in general is demonstrated by feature transformation with
{I}somap.},
OWNER = {aerrity},
TIMESTAMP = {2007.04.24},
URL = {http://www.isca-speech.org/archive/archive_papers/nolisp07/nol7_039.pdf}
}
@INPROCEEDINGS{Errity2007b,
AUTHOR = {A. Errity and J. McKenna and B. Kirkpatrick},
TITLE = {Dimensionality reduction methods applied to both magnitude and phase
derived features},
BOOKTITLE = {Proc. of Interspeech 2007 - Eurospeech},
YEAR = {2007},
PAGES = {1957--1960},
ADDRESS = {Antwerp, Belgium},
MONTH = {August},
ABSTRACT = {A number of previous studies have shown that speech sounds may have
an intrinsic low dimensional structure. Such studies have focused
on magnitude-based features ignoring phase information, as is the
convention in many speech processing applications. In this paper
dimensionality reduction methods are applied to MFCC and modified
group delay function (MODGDF) features derived from the magnitude
and phase spectrum, respectively. The low dimensional structure of
these representations is examined and a method to combine these features
is detailed. Results show that both magnitude and phase derived features
have a low dimensional structure. MFCCs are found to offer higher
accuracy than MODGDFs in phone classification tasks. Results indicate
that combining MFCCs and MODGDFs gives improvements for phone classification.
PCA is shown to be capable of efficiently combining MFCCs and MODGDFs
for improved classification accuracy without large increases in feature
dimensionality.},
KEYWORDS = {manifold learning, dimensionality reduction, phase, modified group
delay function},
OWNER = {ame},
TIMESTAMP = {2007.05.31}
}
@INPROCEEDINGS{Kirkpatrick2007,
AUTHOR = {B. Kirkpatrick and D. O'Brien and R. Scaife and A. Errity},
TITLE = {Spectral dynamics as a source of discontinuity in concatenative speech
synthesis},
BOOKTITLE = {Proc. of the 15th Int. Conf. on Digital Signal Processing (DSP)},
YEAR = {2007},
PAGES = {615--618},
ADDRESS = {Cardiff, Wales},
MONTH = {July},
ABSTRACT = {The quality of concatenative speech synthesis depends on the cost
function employed for unit selection. Effective cost functions for
spectral continuity have proven difficult to define and standard
measures do not accurately reflect human perception of spectral discontinuity
in concatenated speech. Previous studies on spectral join costs have
focused predominantly on static spectral measures extracted from
the unit boundary. In this paper spectral dynamic behaviour is investigated
as a source of discontinuity in concatenated speech. A number of
measures representing spectral dynamics are tested for the task of
detecting discontinuities. The spectral dynamic measures tested contain
information correlating with human perception of discontinuities,
suggesting that spectral dynamics are a source of discontinuity in
concatenated speech. A strategy to effectively combine dynamic and
static measures is proposed using principal component analysis (PCA).},
OWNER = {aerrity},
TIMESTAMP = {2007.04.24},
URL = {http://ieeexplore.ieee.org/iel5/4288490/4288491/04288657.pdf}
}
@INPROCEEDINGS{Kirkpatrick2007a,
AUTHOR = {B. Kirkpatrick and D. O'Brien and R. Scaife and A. Errity},
TITLE = {On the Role of Spectral Dynamics in Unit Selection Speech Synthesis},
BOOKTITLE = {Proc. of Interspeech 2007 - Eurospeech},
YEAR = {2007},
PAGES = {2889--2892},
ADDRESS = {Antwerp, Belgium},
MONTH = {August},
OWNER = {ame},
TIMESTAMP = {2007.05.31}
}
@COMMENT{{jabref-meta: selector_publisher:}}
@COMMENT{{jabref-meta: selector_author:}}
@COMMENT{{jabref-meta: selector_journal:}}
@COMMENT{{jabref-meta: selector_keywords:}}
This file has been generated by bibtex2html 1.74