Merge change 3344 into donut

* changes: Updating TtsEngine.h and SynthProxy.cpp so that buffer memory management is handled on the system side.
2009-06-05 14:11:08 -07:00
parent d299b8194d 83e712a142
commit 327da854e8
2 changed files with 52 additions and 26 deletions
--- a/include/tts/TtsEngine.h
+++ b/include/tts/TtsEngine.h
@@ -25,24 +25,29 @@

 namespace android {

+enum tts_synth_status {
+    TTS_SYNTH_DONE              = 0,
+    TTS_SYNTH_PENDING           = 1
+};
+
+enum tts_callback_status {
+    TTS_CALLBACK_HALT           = 0,
+    TTS_CALLBACK_CONTINUE       = 1
+};
+
 // The callback is used by the implementation of this interface to notify its
 // client, the Android TTS service, that the last requested synthesis has been
-// completed.
+// completed. // TODO reword
 // The callback for synthesis completed takes:
-//    void *       - The userdata pointer set in the original synth call
-//    uint32_t     - Track sampling rate in Hz
-//    audio_format - The AudioSystem::audio_format enum
-//    int          - The number of channels
-//    int8_t *     - A buffer of audio data only valid during the execution of the callback
-//    size_t       - The size of the buffer
-// Note about memory management:
-//    The implementation of TtsEngine is responsible for the management of the memory
-//    it allocates to store the synthesized speech. After the execution of the callback
-//    to hand the synthesized data to the client of TtsEngine, the TTS engine is
-//    free to reuse or free the previously allocated memory.
-//    This implies that the implementation of the "synthDoneCB" callback cannot use
-//    the pointer to the buffer of audio samples outside of the callback itself.
-typedef void (synthDoneCB_t)(void *, uint32_t, AudioSystem::audio_format, int, int8_t *, size_t);
+//    [inout] void *&      - The userdata pointer set in the original synth call
+//    [in]    uint32_t     - Track sampling rate in Hz
+//    [in]    audio_format - The AudioSystem::audio_format enum
+//    [in]    int          - The number of channels
+//    [inout] int8_t *&     - A buffer of audio data only valid during the execution of the callback
+//    [inout] size_t  &     - The size of the buffer
+//    [in]    tts_synth_status  - Status of the synthesis; 0 for done, 1 for more data to be synthesized.
+// Returns the status of the consumer of the synthesis. 0 for stop, 1 for continue.
+typedef tts_callback_status (synthDoneCB_t)(void *&, uint32_t, AudioSystem::audio_format, int, int8_t *&, size_t&, tts_synth_status);

 class TtsEngine;
 extern "C" TtsEngine* getTtsEngine();
@@ -155,13 +160,13 @@ public:
    // @param text      the UTF-8 text to synthesize
    // @param userdata  pointer to be returned when the call is invoked
    // @return          TTS_SUCCESS or TTS_FAILURE
-    virtual tts_result synthesizeText(const char *text, void *userdata);
+    virtual tts_result synthesizeText(const char *text, int8_t *buffer, size_t bufferSize, void *userdata);

    // Synthesize IPA text. When synthesis completes, the engine must call the given callback to notify the TTS API.
    // @param ipa      the IPA data to synthesize
    // @param userdata  pointer to be returned when the call is invoked
    // @return TTS_FEATURE_UNSUPPORTED if IPA is not supported, otherwise TTS_SUCCESS or TTS_FAILURE
-    virtual tts_result synthesizeIpa(const char *ipa, void *userdata);
+    virtual tts_result synthesizeIpa(const char *ipa, int8_t *buffer, size_t bufferSize, void *userdata);
 };

 } // namespace android
--- a/tts/jni/android_tts_SynthProxy.cpp
+++ b/tts/jni/android_tts_SynthProxy.cpp
@@ -32,6 +32,7 @@
 #define DEFAULT_TTS_RATE        16000
 #define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
 #define DEFAULT_TTS_NB_CHANNELS 1
+#define DEFAULT_TTS_BUFFERSIZE  1024

 #define USAGEMODE_PLAY_IMMEDIATELY 0
 #define USAGEMODE_WRITE_TO_FILE    1
@@ -64,6 +65,8 @@ class SynthProxyJniStorage {
        uint32_t                  mSampleRate;
        AudioSystem::audio_format mAudFormat;
        int                       mNbChannels;
+        int8_t *                  mBuffer;
+        size_t                    mBufferSize;

        SynthProxyJniStorage() {
            //tts_class = NULL;
@@ -73,6 +76,8 @@ class SynthProxyJniStorage {
            mSampleRate = DEFAULT_TTS_RATE;
            mAudFormat  = DEFAULT_TTS_FORMAT;
            mNbChannels = DEFAULT_TTS_NB_CHANNELS;
+            mBufferSize = DEFAULT_TTS_BUFFERSIZE;
+            mBuffer = new int8_t[mBufferSize];
        }

        ~SynthProxyJniStorage() {
@@ -81,6 +86,7 @@ class SynthProxyJniStorage {
                mNativeSynthInterface->shutdown();
                mNativeSynthInterface = NULL;
            }
+            delete mBuffer;
        }

        void killAudio() {
@@ -159,23 +165,27 @@ void prepAudioTrack(SynthProxyJniStorage* pJniData,
 * Callback from TTS engine.
 * Directly speaks using AudioTrack or write to file
 */
-static void ttsSynthDoneCB(void * userdata, uint32_t rate,
+static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
                           AudioSystem::audio_format format, int channel,
-                           int8_t *wav, size_t bufferSize) {
+                           int8_t *&wav, size_t &bufferSize, tts_synth_status status) {
    LOGI("ttsSynthDoneCallback: %d bytes", bufferSize);

+    if (userdata == NULL){
+        LOGE("userdata == NULL");
+        return TTS_CALLBACK_HALT;
+    }
    afterSynthData_t* pForAfter = (afterSynthData_t*)userdata;
+    SynthProxyJniStorage* pJniData = (SynthProxyJniStorage*)(pForAfter->jniStorage);

    if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){
        LOGI("Direct speech");

        if (wav == NULL) {
+            delete pForAfter;
            LOGI("Null: speech has completed");
        }

        if (bufferSize > 0) {
-            SynthProxyJniStorage* pJniData =
-                    (SynthProxyJniStorage*)(pForAfter->jniStorage);
            prepAudioTrack(pJniData, rate, format, channel);
            if (pJniData->mAudioOut) {
                pJniData->mAudioOut->write(wav, bufferSize);
@@ -187,6 +197,7 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate,
    } else  if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
        LOGI("Save to file");
        if (wav == NULL) {
+            delete pForAfter;
            LOGI("Null: speech has completed");
        }
        if (bufferSize > 0){
@@ -195,10 +206,17 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate,
    }
    // TODO update to call back into the SynthProxy class through the
    //      javaTTSFields.synthProxyMethodPost methode to notify
-    //      playback has completed
+    //      playback has completed if the synthesis is done, i.e.
+    //      if status == TTS_SYNTH_DONE
+    //delete pForAfter;

-    delete pForAfter;
-    return;
+    // we don't update the wav (output) parameter as we'll let the next callback
+    // write at the same location, we've consumed the data already, but we need
+    // to update bufferSize to let the TTS engine know how much it can write the
+    // next time it calls this function.
+    bufferSize = pJniData->mBufferSize;
+
+    return TTS_CALLBACK_CONTINUE;
 }


@@ -223,7 +241,9 @@ android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz,
    } else {
        TtsEngine *(*get_TtsEngine)() =
            reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine"));
+
        pJniStorage->mNativeSynthInterface = (*get_TtsEngine)();
+
        if (pJniStorage->mNativeSynthInterface) {
            pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB);
        }
@@ -323,7 +343,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,

    // TODO check return codes
    if (pSynthData->mNativeSynthInterface) {
-        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
+        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
                (void *)pForAfter);
    }

@@ -395,7 +415,7 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,

    if (pSynthData->mNativeSynthInterface) {
        const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
-        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
+        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
                (void *)pForAfter);
        env->ReleaseStringUTFChars(textJavaString, textNativeString);
    }
@@ -442,6 +462,7 @@ static void
 android_tts_SynthProxy_playAudioBuffer(JNIEnv *env, jobject thiz, jint jniData,
        int bufferPointer, int bufferSize)
 {
+LOGI("android_tts_SynthProxy_playAudioBuffer");
    if (jniData == 0) {
        LOGE("android_tts_SynthProxy_playAudioBuffer(): invalid JNI data");
        return;