Package rst.audition
Audio signal processing, sometimes referred to as audio processing,
is the intentional alteration of auditory signals, or sound.
This package contains data type definitions related to audio
processing.
Messages
digraph message_graph {
fontname="Arial";
fontsize=11;
stylesheet="../_static/graphs.css";
node [fontsize=11,fontname="Arial"]
edge [fontsize=11,fontname="Arial"]
"5" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.Utterance" TITLE="Structure rst.audition.Utterance" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/message.svg"></IMG></TD><TD ALIGN="left">Utterance</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.PhonemeCollection" TITLE="Structure rst.audition.PhonemeCollection" TARGET="_parent">PhonemeCollection</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.Utterance.phonemes" TITLE="Field rst.audition.Utterance.phonemes" TARGET="_parent" PORT="phonemes">phonemes</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk" TITLE="Structure rst.audition.SoundChunk" TARGET="_parent">SoundChunk</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.Utterance.audio" TITLE="Field rst.audition.Utterance.audio" TARGET="_parent" PORT="audio">audio</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-ASCII-STRING.html#ASCII-STRING" TITLE="Fundamental ASCII-STRING" TARGET="_parent">ASCII-STRING</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.Utterance.textual_representation" TITLE="Field rst.audition.Utterance.textual_representation" TARGET="_parent" PORT="textual_representation">textual_representation</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"];
"6" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.PhonemeCollection" TITLE="Structure rst.audition.PhonemeCollection" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/message.svg"></IMG></TD><TD ALIGN="left">PhonemeCollection</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.Phoneme" TITLE="Structure rst.audition.Phoneme" TARGET="_parent">Phoneme</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.PhonemeCollection.element" TITLE="Field rst.audition.PhonemeCollection.element" TARGET="_parent" PORT="element">element</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"];
"7" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.Phoneme" TITLE="Structure rst.audition.Phoneme" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/message.svg"></IMG></TD><TD ALIGN="left">Phoneme</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-ASCII-STRING.html#ASCII-STRING" TITLE="Fundamental ASCII-STRING" TARGET="_parent">ASCII-STRING</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.Phoneme.symbol" TITLE="Field rst.audition.Phoneme.symbol" TARGET="_parent" PORT="symbol">symbol</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-UINT32.html#UINT32" TITLE="Fundamental UINT32" TARGET="_parent">UINT32</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.Phoneme.duration" TITLE="Field rst.audition.Phoneme.duration" TARGET="_parent" PORT="duration">duration</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"];
"1" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunkCollection" TITLE="Structure rst.audition.SoundChunkCollection" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/message.svg"></IMG></TD><TD ALIGN="left">SoundChunkCollection</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk" TITLE="Structure rst.audition.SoundChunk" TARGET="_parent">SoundChunk</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunkCollection.element" TITLE="Field rst.audition.SoundChunkCollection.element" TARGET="_parent" PORT="element">element</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"];
"2" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk" TITLE="Structure rst.audition.SoundChunk" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/message.svg"></IMG></TD><TD ALIGN="left">SoundChunk</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-OCTET-VECTOR.html#OCTET-VECTOR" TITLE="Fundamental OCTET-VECTOR" TARGET="_parent">OCTET-VECTOR</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.data" TITLE="Field rst.audition.SoundChunk.data" TARGET="_parent" PORT="data">data</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-UINT32.html#UINT32" TITLE="Fundamental UINT32" TARGET="_parent">UINT32</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.sample_count" TITLE="Field rst.audition.SoundChunk.sample_count" TARGET="_parent" PORT="sample_count">sample_count</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-UINT32.html#UINT32" TITLE="Fundamental UINT32" TARGET="_parent">UINT32</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.channels" TITLE="Field rst.audition.SoundChunk.channels" TARGET="_parent" PORT="channels">channels</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-UINT32.html#UINT32" TITLE="Fundamental UINT32" TARGET="_parent">UINT32</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.rate" TITLE="Field rst.audition.SoundChunk.rate" TARGET="_parent" PORT="rate">rate</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType" TITLE="Enum rst.audition.SoundChunk.SampleType" TARGET="_parent">SampleType</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.sample_type" TITLE="Field rst.audition.SoundChunk.sample_type" TARGET="_parent" PORT="sample_type">sample_type</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess" TITLE="Enum rst.audition.SoundChunk.EndianNess" TARGET="_parent">EndianNess</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.endianness" TITLE="Field rst.audition.SoundChunk.endianness" TARGET="_parent" PORT="endianness">endianness</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"];
"4" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess" TITLE="Enum rst.audition.SoundChunk.EndianNess" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/enum.svg"></IMG></TD><TD ALIGN="left">EndianNess</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess.ENDIAN_LITTLE" TITLE="Value rst.audition.SoundChunk.EndianNess.ENDIAN_LITTLE" TARGET="_parent">ENDIAN_LITTLE</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess.ENDIAN_LITTLE" TITLE="Value rst.audition.SoundChunk.EndianNess.ENDIAN_LITTLE" TARGET="_parent">0</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess.ENDIAN_BIG" TITLE="Value rst.audition.SoundChunk.EndianNess.ENDIAN_BIG" TARGET="_parent">ENDIAN_BIG</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess.ENDIAN_BIG" TITLE="Value rst.audition.SoundChunk.EndianNess.ENDIAN_BIG" TARGET="_parent">1</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"];
"3" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType" TITLE="Enum rst.audition.SoundChunk.SampleType" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/enum.svg"></IMG></TD><TD ALIGN="left">SampleType</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S8" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S8" TARGET="_parent">SAMPLE_S8</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S8" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S8" TARGET="_parent">0</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U8" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U8" TARGET="_parent">SAMPLE_U8</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U8" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U8" TARGET="_parent">1</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S16" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S16" TARGET="_parent">SAMPLE_S16</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S16" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S16" TARGET="_parent">2</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U16" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U16" TARGET="_parent">SAMPLE_U16</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U16" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U16" TARGET="_parent">4</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S24" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S24" TARGET="_parent">SAMPLE_S24</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S24" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S24" TARGET="_parent">8</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U24" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U24" TARGET="_parent">SAMPLE_U24</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U24" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U24" TARGET="_parent">16</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"];
"5":audio -> "2" [];
"5":phonemes -> "6" [];
"6":element -> "7" [];
"1":element -> "2" [];
"2" -> "4" [];
"2" -> "3" [];
"2":endianness -> "4" [];
"2":sample_type -> "3" [];
}
clearer: should be made invisible via css
Message SoundChunkCollection
class rst.audition.
SoundChunkCollection
Collection of SoundChunk
instances.
Auto-generated.
element
The individual elements of the collection.
Constraints regarding the empty collection, sorting, duplicated
entries etc. are use case specific.
Download this file
message SoundChunkCollection {
/**
* The individual elements of the collection.
*
* Constraints regarding the empty collection, sorting, duplicated
* entries etc. are use case specific.
*/
repeated SoundChunk element = 1 ;
}
Message Utterance
class rst.audition.
Utterance
Objects of this represent a single utterances of speech.
The data describes a single utterance in three different forms:
phonemes
describes the utterance as a list of phone symbols
and durations (useful e.g. for lip animation).
audio
is a <Could not resolve reference to SoundChunk> that can be played back on audio
devices containing the realization (e.g. by a TTS system)
of the included phoneme list
<Could not resolve reference to .description> is a textual description of the utterance for
debugging purposes.
Code author: Simon Schulz <sschulz@ techfak. uni-bielefeld. de >
phonemes
A collection of phonemes. Will be played back in the same
ordering as given by Phoneme
audio
A chunk of audio data that can be played back containing the
realization (e.g. by a TTS system) of the included phoneme list
textual_representation
Textual representation of the utterance.
Download this file
message Utterance {
/**
* A collection of phonemes. Will be played back in the same
* ordering as given by @ref .Phoneme
*/
required PhonemeCollection phonemes = 1 ;
/**
* A chunk of audio data that can be played back containing the
* realization (e.g. by a TTS system) of the included phoneme list
*/
required SoundChunk audio = 2 ;
/**
* Textual representation of the utterance.
*/
required string textual_representation = 3 ;
}
Message SoundChunk
class rst.audition.
SoundChunk
Constraint : len(.data) == 8 * .channels * .sample_count * TODO(.sample_type)
Objects of this represent a chunk of an audio stream.
The audio information for one or more channels
is stored in
data
as a sequence of sample_count
encoded samples, the
encoding of which is described by endianness
and sample_type
.
Depending on the sample rate (rate
), such a chunk of audio
corresponds to a certain amount of time during which its samples
have been recorded.
Interpretation of RSB timestamps:
create:
Capture time of the audio buffer. More precisely, the
timestamp should correspond to the first sample contained
in the buffer.
Code author: David Klotz <dklotz@ techfak. uni-bielefeld. de >
@create_collection
data
The sequences of bytes representing the samples of this sound
chunk.
The value of this field must be interpreted according to the
values of the sample_count
, channels
, sample_type
and endianness
fields.
sample_count
Unit : number
The number of samples contained in data
.
channels
Unit : number
The number of channels for which samples are stored in data
.
rate
Unit : hz
The rate with which the samples stored in data
haven been
recorded or should be played.
sample_type
The data type used for the representation of samples in data
.
endianness
The Endianness used for the representation of samples in data
.
Download this file
message SoundChunk {
/**
* The possible data types for representing individual samples.
*/
enum SampleType {
/**
* Signed 8-bit samples.
*/
SAMPLE_S8 = 0 ;
/**
* Unsigned 8-bit samples.
*/
SAMPLE_U8 = 1 ;
/**
* Signed 16-bit samples.
*/
SAMPLE_S16 = 2 ;
/**
* Unsigned 16-bit samples.
*/
SAMPLE_U16 = 4 ;
/**
* Signed 24-bit samples.
*/
SAMPLE_S24 = 8 ;
/**
* Unsigned 24-bit samples.
*/
SAMPLE_U24 = 16 ;
}
/**
* The possible byte-orders for representing samples.
*/
enum EndianNess {
/**
* Samples are represented with little Endian byte-order.
*/
ENDIAN_LITTLE = 0 ;
/**
* Samples are represented with big Endian byte-order.
*/
ENDIAN_BIG = 1 ;
}
/**
* The sequences of bytes representing the samples of this sound
* chunk.
*
* The value of this field must be interpreted according to the
* values of the @ref .sample_count, @ref .channels, @ref
* .sample_type and @ref .endianness fields.
*/
required bytes data = 1 ;
/**
* The number of samples contained in @ref .data.
*/
// @unit(number)
required uint32 sample_count = 2 ;
/**
* The number of channels for which samples are stored in @ref
* .data.
*/
// @unit(number)
optional uint32 channels = 3 [ default = 1 ];
/**
* The rate with which the samples stored in @ref .data haven been
* recorded or should be played.
*/
// @unit(hz)
optional uint32 rate = 4 [ default = 44100 ];
/**
* The data type used for the representation of samples in @ref
* .data.
*/
optional SampleType sample_type = 5 [ default = SAMPLE_S16 ];
/**
* The Endianness used for the representation of samples in @ref
* .data.
*/
optional EndianNess endianness = 6 [ default = ENDIAN_LITTLE ];
// TODO: interleaving type?
}
Message SampleType
class rst.audition.SoundChunk.
SampleType
The possible data types for representing individual samples.
SAMPLE_S8
= 0
Signed 8-bit samples.
SAMPLE_U8
= 1
Unsigned 8-bit samples.
SAMPLE_S16
= 2
Signed 16-bit samples.
SAMPLE_U16
= 4
Unsigned 16-bit samples.
SAMPLE_S24
= 8
Signed 24-bit samples.
SAMPLE_U24
= 16
Unsigned 24-bit samples.
Download this file
enum SampleType {
/**
* Signed 8-bit samples.
*/
SAMPLE_S8 = 0 ;
/**
* Unsigned 8-bit samples.
*/
SAMPLE_U8 = 1 ;
/**
* Signed 16-bit samples.
*/
SAMPLE_S16 = 2 ;
/**
* Unsigned 16-bit samples.
*/
SAMPLE_U16 = 4 ;
/**
* Signed 24-bit samples.
*/
SAMPLE_S24 = 8 ;
/**
* Unsigned 24-bit samples.
*/
SAMPLE_U24 = 16 ;
}
Message EndianNess
class rst.audition.SoundChunk.
EndianNess
The possible byte-orders for representing samples.
ENDIAN_LITTLE
= 0
Samples are represented with little Endian byte-order.
ENDIAN_BIG
= 1
Samples are represented with big Endian byte-order.
Download this file
enum EndianNess {
/**
* Samples are represented with little Endian byte-order.
*/
ENDIAN_LITTLE = 0 ;
/**
* Samples are represented with big Endian byte-order.
*/
ENDIAN_BIG = 1 ;
}
Message PhonemeCollection
class rst.audition.
PhonemeCollection
Collection of Phoneme
instances.
Auto-generated.
element
The individual elements of the collection.
Constraints regarding the empty collection, sorting, duplicated
entries etc. are use case specific.
Download this file
message PhonemeCollection {
/**
* The individual elements of the collection.
*
* Constraints regarding the empty collection, sorting, duplicated
* entries etc. are use case specific.
*/
repeated Phoneme element = 1 ;
}
Message Phoneme
Download this file
message Phoneme {
/**
* A single phone symbol (such as aI, E, C, R, _, ...).
*
* e.g. see https://en.wikipedia.org/wiki/Phoneme
* or http://www.phon.ucl.ac.uk/home/sampa/german.htm (german)
* examples
*/
required string symbol = 1 ;
/**
* The duration of this symbol.
*/
// @unit(millisecond)
required uint32 duration = 2 ;
}