Package rst.audition

Not documented

Messages

digraph message_graph { fontname="Arial"; fontsize=11; stylesheet="../_static/graphs.css"; node [fontsize=11,fontname="Arial"] edge [fontsize=11,fontname="Arial"] "1" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.Utterance" TITLE="Structure rst.audition.Utterance" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/message-sandbox.svg"></IMG></TD><TD ALIGN="left">Utterance</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.PhonemeCollection" TITLE="Structure rst.audition.PhonemeCollection" TARGET="_parent">PhonemeCollection</TD><TD ALIGN="left" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.Utterance.phonemes" TITLE="Field rst.audition.Utterance.phonemes" TARGET="_parent" PORT="phonemes">phonemes</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk" TITLE="Structure rst.audition.SoundChunk" TARGET="_parent">SoundChunk</TD><TD ALIGN="left" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.Utterance.audio" TITLE="Field rst.audition.Utterance.audio" TARGET="_parent" PORT="audio">audio</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-ASCII-STRING.html#ASCII-STRING" TITLE="Fundamental ASCII-STRING" TARGET="_parent">ASCII-STRING</TD><TD ALIGN="left" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.Utterance.textual_representation" TITLE="Field rst.audition.Utterance.textual_representation" TARGET="_parent" PORT="textual_representation">textual_representation</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"]; "4" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk" TITLE="Structure rst.audition.SoundChunk" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/message.svg"></IMG></TD><TD ALIGN="left">SoundChunk</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-OCTET-VECTOR.html#OCTET-VECTOR" TITLE="Fundamental OCTET-VECTOR" TARGET="_parent">OCTET-VECTOR</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.data" TITLE="Field rst.audition.SoundChunk.data" TARGET="_parent" PORT="data">data</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-UINT32.html#UINT32" TITLE="Fundamental UINT32" TARGET="_parent">UINT32</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.sample_count" TITLE="Field rst.audition.SoundChunk.sample_count" TARGET="_parent" PORT="sample_count">sample_count</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-UINT32.html#UINT32" TITLE="Fundamental UINT32" TARGET="_parent">UINT32</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.channels" TITLE="Field rst.audition.SoundChunk.channels" TARGET="_parent" PORT="channels">channels</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-UINT32.html#UINT32" TITLE="Fundamental UINT32" TARGET="_parent">UINT32</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.rate" TITLE="Field rst.audition.SoundChunk.rate" TARGET="_parent" PORT="rate">rate</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType" TITLE="Enum rst.audition.SoundChunk.SampleType" TARGET="_parent">SampleType</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.sample_type" TITLE="Field rst.audition.SoundChunk.sample_type" TARGET="_parent" PORT="sample_type">sample_type</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess" TITLE="Enum rst.audition.SoundChunk.EndianNess" TARGET="_parent">EndianNess</TD><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.endianness" TITLE="Field rst.audition.SoundChunk.endianness" TARGET="_parent" PORT="endianness">endianness</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"]; "6" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess" TITLE="Enum rst.audition.SoundChunk.EndianNess" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/enum.svg"></IMG></TD><TD ALIGN="left">EndianNess</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess.ENDIAN_LITTLE" TITLE="Value rst.audition.SoundChunk.EndianNess.ENDIAN_LITTLE" TARGET="_parent">ENDIAN_LITTLE</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess.ENDIAN_LITTLE" TITLE="Value rst.audition.SoundChunk.EndianNess.ENDIAN_LITTLE" TARGET="_parent">0</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess.ENDIAN_BIG" TITLE="Value rst.audition.SoundChunk.EndianNess.ENDIAN_BIG" TARGET="_parent">ENDIAN_BIG</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.EndianNess.ENDIAN_BIG" TITLE="Value rst.audition.SoundChunk.EndianNess.ENDIAN_BIG" TARGET="_parent">1</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"]; "5" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType" TITLE="Enum rst.audition.SoundChunk.SampleType" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/enum.svg"></IMG></TD><TD ALIGN="left">SampleType</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S8" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S8" TARGET="_parent">SAMPLE_S8</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S8" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S8" TARGET="_parent">0</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U8" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U8" TARGET="_parent">SAMPLE_U8</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U8" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U8" TARGET="_parent">1</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S16" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S16" TARGET="_parent">SAMPLE_S16</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S16" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S16" TARGET="_parent">2</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U16" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U16" TARGET="_parent">SAMPLE_U16</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U16" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U16" TARGET="_parent">4</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S24" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S24" TARGET="_parent">SAMPLE_S24</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_S24" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_S24" TARGET="_parent">8</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U24" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U24" TARGET="_parent">SAMPLE_U24</TD><TD ALIGN="right" HREF="../generated/stable/package-rst-audition.html#rst.audition.SoundChunk.SampleType.SAMPLE_U24" TITLE="Value rst.audition.SoundChunk.SampleType.SAMPLE_U24" TARGET="_parent">16</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"]; "2" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.PhonemeCollection" TITLE="Structure rst.audition.PhonemeCollection" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/message-sandbox.svg"></IMG></TD><TD ALIGN="left">PhonemeCollection</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.Phoneme" TITLE="Structure rst.audition.Phoneme" TARGET="_parent">Phoneme</TD><TD ALIGN="left" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.PhonemeCollection.element" TITLE="Field rst.audition.PhonemeCollection.element" TARGET="_parent" PORT="element">element</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"]; "3" [label=<<TABLE BORDER="0"><TR><TD COLSPAN="2" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.Phoneme" TITLE="Structure rst.audition.Phoneme" TARGET="_parent"><TABLE BORDER="0"><TR><TD ALIGN="right"><IMG SRC="../_static/message-sandbox.svg"></IMG></TD><TD ALIGN="left">Phoneme</TD></TR></TABLE></TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-ASCII-STRING.html#ASCII-STRING" TITLE="Fundamental ASCII-STRING" TARGET="_parent">ASCII-STRING</TD><TD ALIGN="left" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.Phoneme.symbol" TITLE="Field rst.audition.Phoneme.symbol" TARGET="_parent" PORT="symbol">symbol</TD></TR><TR><TD ALIGN="left" HREF="../generated/stable/package-__rosetta-UINT32.html#UINT32" TITLE="Fundamental UINT32" TARGET="_parent">UINT32</TD><TD ALIGN="left" HREF="../generated/sandbox/package-rst-audition.html#rst.audition.Phoneme.duration" TITLE="Field rst.audition.Phoneme.duration" TARGET="_parent" PORT="duration">duration</TD></TR></TABLE>>,shape=box,style=filled,fillcolor="white"]; "1":audio -> "4" []; "1":phonemes -> "2" []; "4" -> "6" []; "4" -> "5" []; "4":endianness -> "6" []; "4":sample_type -> "5" []; "2":element -> "3" []; }

clearer: should be made invisible via css

Message Utterance

class rst.audition.Utterance

Objects of this represent a single utterances of speech.

The data describes a single utterance in three different forms:

  • phonemes describes the utterance as a list of phone symbols and durations (useful e.g. for lip animation).
  • audio is a <Could not resolve reference to SoundChunk> that can be played back on audio devices containing the realization (e.g. by a TTS system) of the included phoneme list
  • <Could not resolve reference to .description> is a textual description of the utterance for debugging purposes.

Code author: Simon Schulz <sschulz@techfak.uni-bielefeld.de>

phonemes
Type:rst.audition.PhonemeCollection

A collection of phonemes. Will be played back in the same ordering as given by Phoneme

audio
Type:rst.audition.SoundChunk

A chunk of audio data that can be played back containing the realization (e.g. by a TTS system) of the included phoneme list

textual_representation
Type:ASCII-STRING

Textual representation of the utterance.

Download this file

message Utterance {

    /**
     * A collection of phonemes. Will be played back in the same
     * ordering as given by @ref .Phoneme
     */
    required PhonemeCollection phonemes = 1;

    /**
     * A chunk of audio data that can be played back containing the
     * realization (e.g. by a TTS system) of the included phoneme list
     */
    required SoundChunk audio = 2;

    /**
     * Textual representation of the utterance.
     */
    required string textual_representation = 3;

}

Message SoundChunk

class rst.audition.SoundChunk

Constraint: len(.data) == 8 * .channels * .sample_count * TODO(.sample_type)

Objects of this represent a chunk of an audio stream.

The audio information for one or more channels is stored in data as a sequence of sample_count encoded samples, the encoding of which is described by endianness and sample_type.

Depending on the sample rate (rate), such a chunk of audio corresponds to a certain amount of time during which its samples have been recorded.

Interpretation of RSB timestamps:

create:
Capture time of the audio buffer. More precisely, the timestamp should correspond to the first sample contained in the buffer.

Code author: David Klotz <dklotz@techfak.uni-bielefeld.de>

@create_collection

data
Type:OCTET-VECTOR

The sequences of bytes representing the samples of this sound chunk.

The value of this field must be interpreted according to the values of the sample_count, channels, sample_type and endianness fields.

sample_count
Type:UINT32

Unit: number

The number of samples contained in data.

channels
Type:UINT32

Unit: number

The number of channels for which samples are stored in data.

rate
Type:UINT32

Unit: hz

The rate with which the samples stored in data haven been recorded or should be played.

sample_type
Type:rst.audition.SoundChunk.SampleType

The data type used for the representation of samples in data.

endianness
Type:rst.audition.SoundChunk.EndianNess

The Endianness used for the representation of samples in data.

Download this file

message SoundChunk {

    /**
     * The possible data types for representing individual samples.
     */
    enum SampleType {

        /**
         * Signed 8-bit samples.
         */
        SAMPLE_S8 = 0;

        /**
         * Unsigned 8-bit samples.
         */
        SAMPLE_U8 = 1;

        /**
         * Signed 16-bit samples.
         */
        SAMPLE_S16 = 2;

        /**
         * Unsigned 16-bit samples.
         */
        SAMPLE_U16 = 4;

        /**
         * Signed 24-bit samples.
         */
        SAMPLE_S24 = 8;

        /**
         * Unsigned 24-bit samples.
         */
        SAMPLE_U24 = 16;

    }

    /**
     * The possible byte-orders for representing samples.
     */
    enum EndianNess {

        /**
         * Samples are represented with little Endian byte-order.
         */
        ENDIAN_LITTLE = 0;

        /**
         * Samples are represented with big Endian byte-order.
         */
        ENDIAN_BIG = 1;
    }

    /**
     * The sequences of bytes representing the samples of this sound
     * chunk.
     *
     * The value of this field must be interpreted according to the
     * values of the @ref .sample_count, @ref .channels, @ref
     * .sample_type and @ref .endianness fields.
     */
    required bytes data = 1;

    /**
     * The number of samples contained in @ref .data.
     */
    // @unit(number)
    required uint32 sample_count = 2;

    /**
     * The number of channels for which samples are stored in @ref
     * .data.
     */
    // @unit(number)
    optional uint32 channels = 3 [default = 1];

    /**
     * The rate with which the samples stored in @ref .data haven been
     * recorded or should be played.
     */
    // @unit(hz)
    optional uint32 rate = 4 [default = 44100];

    /**
     * The data type used for the representation of samples in @ref
     * .data.
     */
    optional SampleType sample_type = 5 [default = SAMPLE_S16];

    /**
     * The Endianness used for the representation of samples in @ref
     * .data.
     */
    optional EndianNess endianness = 6 [default = ENDIAN_LITTLE];

    // TODO: interleaving type?

}

Message SampleType

class rst.audition.SoundChunk.SampleType

The possible data types for representing individual samples.

SAMPLE_S8
= 0

Signed 8-bit samples.

SAMPLE_U8
= 1

Unsigned 8-bit samples.

SAMPLE_S16
= 2

Signed 16-bit samples.

SAMPLE_U16
= 4

Unsigned 16-bit samples.

SAMPLE_S24
= 8

Signed 24-bit samples.

SAMPLE_U24
= 16

Unsigned 24-bit samples.

Download this file

    enum SampleType {

        /**
         * Signed 8-bit samples.
         */
        SAMPLE_S8 = 0;

        /**
         * Unsigned 8-bit samples.
         */
        SAMPLE_U8 = 1;

        /**
         * Signed 16-bit samples.
         */
        SAMPLE_S16 = 2;

        /**
         * Unsigned 16-bit samples.
         */
        SAMPLE_U16 = 4;

        /**
         * Signed 24-bit samples.
         */
        SAMPLE_S24 = 8;

        /**
         * Unsigned 24-bit samples.
         */
        SAMPLE_U24 = 16;

    }

Message EndianNess

class rst.audition.SoundChunk.EndianNess

The possible byte-orders for representing samples.

ENDIAN_LITTLE
= 0

Samples are represented with little Endian byte-order.

ENDIAN_BIG
= 1

Samples are represented with big Endian byte-order.

Download this file

    enum EndianNess {

        /**
         * Samples are represented with little Endian byte-order.
         */
        ENDIAN_LITTLE = 0;

        /**
         * Samples are represented with big Endian byte-order.
         */
        ENDIAN_BIG = 1;
    }

Message PhonemeCollection

class rst.audition.PhonemeCollection

Collection of Phoneme instances.

Auto-generated.

element
Type:array of rst.audition.Phoneme

The individual elements of the collection.

Constraints regarding the empty collection, sorting, duplicated entries etc. are use case specific.

Download this file

message PhonemeCollection {

    /**
     * The individual elements of the collection.
     *
     * Constraints regarding the empty collection, sorting, duplicated
     * entries etc. are use case specific.
     */
    repeated Phoneme element = 1;

}

Message Phoneme

class rst.audition.Phoneme

Objects of this represent a single phoneme-duration pair.

A list of elements of this type can be used to describe words or whole sentences of speech.

Code author: Simon Schulz <sschulz@techfak.uni-bielefeld.de>

@create_collection

symbol
Type:ASCII-STRING

A single phone symbol (such as aI, E, C, R, _, ...).

e.g. see https://en.wikipedia.org/wiki/Phoneme
or http://www.phon.ucl.ac.uk/home/sampa/german.htm (german) examples
duration
Type:UINT32

Unit: millisecond

The duration of this symbol.

Download this file

message Phoneme {

    /**
     * A single phone symbol (such as aI, E, C, R, _, ...).
     *
     * e.g. see https://en.wikipedia.org/wiki/Phoneme
     *      or http://www.phon.ucl.ac.uk/home/sampa/german.htm (german)
     *      examples
     */
    required string symbol = 1;

    /**
     * The duration of this symbol.
     */
    // @unit(millisecond)
    required uint32 duration = 2;

}