992 lines
31 KiB
C++
992 lines
31 KiB
C++
/*
|
|
* Copyright (C) 2009 Google Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
|
|
#define LOG_TAG "SynthProxy"
|
|
|
|
#include <utils/Log.h>
|
|
#include <nativehelper/jni.h>
|
|
#include <nativehelper/JNIHelp.h>
|
|
#include <android_runtime/AndroidRuntime.h>
|
|
#include <tts/TtsEngine.h>
|
|
#include <media/AudioTrack.h>
|
|
#include <math.h>
|
|
|
|
#include <dlfcn.h>
|
|
|
|
#define DEFAULT_TTS_RATE 16000
|
|
#define DEFAULT_TTS_FORMAT AudioSystem::PCM_16_BIT
|
|
#define DEFAULT_TTS_NB_CHANNELS 1
|
|
#define DEFAULT_TTS_BUFFERSIZE 2048
|
|
// TODO use the TTS stream type when available
|
|
#define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC
|
|
|
|
// EQ + BOOST parameters
|
|
#define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB
|
|
#define FILTER_TRANSITION_FREQ 1100.0f // in Hz
|
|
#define FILTER_SHELF_SLOPE 1.0f // Q
|
|
#define FILTER_GAIN 5.5f // linear gain
|
|
|
|
#define USAGEMODE_PLAY_IMMEDIATELY 0
|
|
#define USAGEMODE_WRITE_TO_FILE 1
|
|
|
|
#define SYNTHPLAYSTATE_IS_STOPPED 0
|
|
#define SYNTHPLAYSTATE_IS_PLAYING 1
|
|
|
|
using namespace android;
|
|
|
|
// ----------------------------------------------------------------------------
|
|
struct fields_t {
|
|
jfieldID synthProxyFieldJniData;
|
|
jclass synthProxyClass;
|
|
jmethodID synthProxyMethodPost;
|
|
};
|
|
|
|
// structure to hold the data that is used each time the TTS engine has synthesized more data
|
|
struct afterSynthData_t {
|
|
jint jniStorage;
|
|
int usageMode;
|
|
FILE* outputFile;
|
|
AudioSystem::stream_type streamType;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// EQ data
|
|
double amp;
|
|
double w;
|
|
double sinw;
|
|
double cosw;
|
|
double beta;
|
|
double a0, a1, a2, b0, b1, b2;
|
|
double m_fa, m_fb, m_fc, m_fd, m_fe;
|
|
double x0; // x[n]
|
|
double x1; // x[n-1]
|
|
double x2; // x[n-2]
|
|
double out0;// y[n]
|
|
double out1;// y[n-1]
|
|
double out2;// y[n-2]
|
|
|
|
static float fFilterLowshelfAttenuation = FILTER_LOWSHELF_ATTENUATION;
|
|
static float fFilterTransitionFreq = FILTER_TRANSITION_FREQ;
|
|
static float fFilterShelfSlope = FILTER_SHELF_SLOPE;
|
|
static float fFilterGain = FILTER_GAIN;
|
|
static bool bUseFilter = false;
|
|
|
|
void initializeEQ() {
|
|
|
|
amp = float(pow(10.0, fFilterLowshelfAttenuation / 40.0));
|
|
w = 2.0 * M_PI * (fFilterTransitionFreq / DEFAULT_TTS_RATE);
|
|
sinw = float(sin(w));
|
|
cosw = float(cos(w));
|
|
beta = float(sqrt(amp)/fFilterShelfSlope);
|
|
|
|
// initialize low-shelf parameters
|
|
b0 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) + (beta*sinw));
|
|
b1 = 2.0F * amp * ((amp-1.0F) - ((amp+1.0F)*cosw));
|
|
b2 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) - (beta*sinw));
|
|
a0 = (amp+1.0F) + ((amp-1.0F)*cosw) + (beta*sinw);
|
|
a1 = 2.0F * ((amp-1.0F) + ((amp+1.0F)*cosw));
|
|
a2 = -((amp+1.0F) + ((amp-1.0F)*cosw) - (beta*sinw));
|
|
|
|
m_fa = fFilterGain * b0/a0;
|
|
m_fb = fFilterGain * b1/a0;
|
|
m_fc = fFilterGain * b2/a0;
|
|
m_fd = a1/a0;
|
|
m_fe = a2/a0;
|
|
}
|
|
|
|
void initializeFilter() {
|
|
x0 = 0.0f;
|
|
x1 = 0.0f;
|
|
x2 = 0.0f;
|
|
out0 = 0.0f;
|
|
out1 = 0.0f;
|
|
out2 = 0.0f;
|
|
}
|
|
|
|
void applyFilter(int16_t* buffer, size_t sampleCount) {
|
|
|
|
for (size_t i=0 ; i<sampleCount ; i++) {
|
|
|
|
x0 = (double) buffer[i];
|
|
|
|
out0 = (m_fa*x0) + (m_fb*x1) + (m_fc*x2) + (m_fd*out1) + (m_fe*out2);
|
|
|
|
x2 = x1;
|
|
x1 = x0;
|
|
|
|
out2 = out1;
|
|
out1 = out0;
|
|
|
|
if (out0 > 32767.0f) {
|
|
buffer[i] = 32767;
|
|
} else if (out0 < -32768.0f) {
|
|
buffer[i] = -32768;
|
|
} else {
|
|
buffer[i] = (int16_t) out0;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
static fields_t javaTTSFields;
|
|
|
|
// TODO move to synth member once we have multiple simultaneous engines running
|
|
static Mutex engineMutex;
|
|
|
|
// ----------------------------------------------------------------------------
|
|
class SynthProxyJniStorage {
|
|
public :
|
|
jobject tts_ref;
|
|
TtsEngine* mNativeSynthInterface;
|
|
void* mEngineLibHandle;
|
|
AudioTrack* mAudioOut;
|
|
int8_t mPlayState;
|
|
Mutex mPlayLock;
|
|
AudioSystem::stream_type mStreamType;
|
|
uint32_t mSampleRate;
|
|
uint32_t mAudFormat;
|
|
int mNbChannels;
|
|
int8_t * mBuffer;
|
|
size_t mBufferSize;
|
|
|
|
SynthProxyJniStorage() {
|
|
tts_ref = NULL;
|
|
mNativeSynthInterface = NULL;
|
|
mEngineLibHandle = NULL;
|
|
mAudioOut = NULL;
|
|
mPlayState = SYNTHPLAYSTATE_IS_STOPPED;
|
|
mStreamType = DEFAULT_TTS_STREAM_TYPE;
|
|
mSampleRate = DEFAULT_TTS_RATE;
|
|
mAudFormat = DEFAULT_TTS_FORMAT;
|
|
mNbChannels = DEFAULT_TTS_NB_CHANNELS;
|
|
mBufferSize = DEFAULT_TTS_BUFFERSIZE;
|
|
mBuffer = new int8_t[mBufferSize];
|
|
memset(mBuffer, 0, mBufferSize);
|
|
}
|
|
|
|
~SynthProxyJniStorage() {
|
|
//LOGV("entering ~SynthProxyJniStorage()");
|
|
killAudio();
|
|
if (mNativeSynthInterface) {
|
|
mNativeSynthInterface->shutdown();
|
|
mNativeSynthInterface = NULL;
|
|
}
|
|
if (mEngineLibHandle) {
|
|
//LOGE("~SynthProxyJniStorage(): before close library");
|
|
int res = dlclose(mEngineLibHandle);
|
|
LOGE_IF( res != 0, "~SynthProxyJniStorage(): dlclose returned %d", res);
|
|
}
|
|
delete mBuffer;
|
|
}
|
|
|
|
void killAudio() {
|
|
if (mAudioOut) {
|
|
mAudioOut->stop();
|
|
delete mAudioOut;
|
|
mAudioOut = NULL;
|
|
}
|
|
}
|
|
|
|
void createAudioOut(AudioSystem::stream_type streamType, uint32_t rate,
|
|
AudioSystem::audio_format format, int channel) {
|
|
mSampleRate = rate;
|
|
mAudFormat = format;
|
|
mNbChannels = channel;
|
|
mStreamType = streamType;
|
|
|
|
// retrieve system properties to ensure successful creation of the
|
|
// AudioTrack object for playback
|
|
int afSampleRate;
|
|
if (AudioSystem::getOutputSamplingRate(&afSampleRate, mStreamType) != NO_ERROR) {
|
|
afSampleRate = 44100;
|
|
}
|
|
int afFrameCount;
|
|
if (AudioSystem::getOutputFrameCount(&afFrameCount, mStreamType) != NO_ERROR) {
|
|
afFrameCount = 2048;
|
|
}
|
|
uint32_t afLatency;
|
|
if (AudioSystem::getOutputLatency(&afLatency, mStreamType) != NO_ERROR) {
|
|
afLatency = 500;
|
|
}
|
|
uint32_t minBufCount = afLatency / ((1000 * afFrameCount)/afSampleRate);
|
|
if (minBufCount < 2) minBufCount = 2;
|
|
int minFrameCount = (afFrameCount * rate * minBufCount)/afSampleRate;
|
|
|
|
mPlayLock.lock();
|
|
mAudioOut = new AudioTrack(mStreamType, rate, format,
|
|
(channel == 2) ? AudioSystem::CHANNEL_OUT_STEREO : AudioSystem::CHANNEL_OUT_MONO,
|
|
minFrameCount > 4096 ? minFrameCount : 4096,
|
|
0, 0, 0, 0); // not using an AudioTrack callback
|
|
|
|
if (mAudioOut->initCheck() != NO_ERROR) {
|
|
LOGE("createAudioOut(): AudioTrack error");
|
|
delete mAudioOut;
|
|
mAudioOut = NULL;
|
|
} else {
|
|
//LOGI("AudioTrack OK");
|
|
mAudioOut->setVolume(1.0f, 1.0f);
|
|
LOGV("AudioTrack ready");
|
|
}
|
|
mPlayLock.unlock();
|
|
}
|
|
};
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
void prepAudioTrack(SynthProxyJniStorage* pJniData, AudioSystem::stream_type streamType,
|
|
uint32_t rate, AudioSystem::audio_format format, int channel) {
|
|
// Don't bother creating a new audiotrack object if the current
|
|
// object is already initialized with the same audio parameters.
|
|
if ( pJniData->mAudioOut &&
|
|
(rate == pJniData->mSampleRate) &&
|
|
(format == pJniData->mAudFormat) &&
|
|
(channel == pJniData->mNbChannels) &&
|
|
(streamType == pJniData->mStreamType) ){
|
|
return;
|
|
}
|
|
if (pJniData->mAudioOut){
|
|
pJniData->killAudio();
|
|
}
|
|
pJniData->createAudioOut(streamType, rate, format, channel);
|
|
}
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
/*
|
|
* Callback from TTS engine.
|
|
* Directly speaks using AudioTrack or write to file
|
|
*/
|
|
static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
|
|
uint32_t format, int channel,
|
|
int8_t *&wav, size_t &bufferSize, tts_synth_status status) {
|
|
//LOGV("ttsSynthDoneCallback: %d bytes", bufferSize);
|
|
|
|
if (userdata == NULL){
|
|
LOGE("userdata == NULL");
|
|
return TTS_CALLBACK_HALT;
|
|
}
|
|
afterSynthData_t* pForAfter = (afterSynthData_t*)userdata;
|
|
SynthProxyJniStorage* pJniData = (SynthProxyJniStorage*)(pForAfter->jniStorage);
|
|
|
|
if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){
|
|
//LOGV("Direct speech");
|
|
|
|
if (wav == NULL) {
|
|
delete pForAfter;
|
|
LOGV("Null: speech has completed");
|
|
}
|
|
|
|
if (bufferSize > 0) {
|
|
prepAudioTrack(pJniData, pForAfter->streamType, rate, (AudioSystem::audio_format)format, channel);
|
|
if (pJniData->mAudioOut) {
|
|
pJniData->mPlayLock.lock();
|
|
if(pJniData->mAudioOut->stopped()
|
|
&& (pJniData->mPlayState == SYNTHPLAYSTATE_IS_PLAYING)) {
|
|
pJniData->mAudioOut->start();
|
|
}
|
|
pJniData->mPlayLock.unlock();
|
|
if (bUseFilter) {
|
|
applyFilter((int16_t*)wav, bufferSize/2);
|
|
}
|
|
pJniData->mAudioOut->write(wav, bufferSize);
|
|
memset(wav, 0, bufferSize);
|
|
//LOGV("AudioTrack wrote: %d bytes", bufferSize);
|
|
} else {
|
|
LOGE("Can't play, null audiotrack");
|
|
}
|
|
}
|
|
} else if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
|
|
//LOGV("Save to file");
|
|
if (wav == NULL) {
|
|
delete pForAfter;
|
|
LOGV("Null: speech has completed");
|
|
return TTS_CALLBACK_HALT;
|
|
}
|
|
if (bufferSize > 0){
|
|
if (bUseFilter) {
|
|
applyFilter((int16_t*)wav, bufferSize/2);
|
|
}
|
|
fwrite(wav, 1, bufferSize, pForAfter->outputFile);
|
|
memset(wav, 0, bufferSize);
|
|
}
|
|
}
|
|
// Future update:
|
|
// For sync points in the speech, call back into the SynthProxy class through the
|
|
// javaTTSFields.synthProxyMethodPost methode to notify
|
|
// playback has completed if the synthesis is done or if a marker has been reached.
|
|
|
|
if (status == TTS_SYNTH_DONE) {
|
|
// this struct was allocated in the original android_tts_SynthProxy_speak call,
|
|
// all processing matching this call is now done.
|
|
LOGV("Speech synthesis done.");
|
|
if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY) {
|
|
// only delete for direct playback. When writing to a file, we still have work to do
|
|
// in android_tts_SynthProxy_synthesizeToFile. The struct will be deleted there.
|
|
delete pForAfter;
|
|
pForAfter = NULL;
|
|
}
|
|
return TTS_CALLBACK_HALT;
|
|
}
|
|
|
|
// we don't update the wav (output) parameter as we'll let the next callback
|
|
// write at the same location, we've consumed the data already, but we need
|
|
// to update bufferSize to let the TTS engine know how much it can write the
|
|
// next time it calls this function.
|
|
bufferSize = pJniData->mBufferSize;
|
|
|
|
return TTS_CALLBACK_CONTINUE;
|
|
}
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
static int
|
|
android_tts_SynthProxy_setLowShelf(JNIEnv *env, jobject thiz, jboolean applyFilter,
|
|
jfloat filterGain, jfloat attenuationInDb, jfloat freqInHz, jfloat slope)
|
|
{
|
|
int result = TTS_SUCCESS;
|
|
|
|
bUseFilter = applyFilter;
|
|
if (applyFilter) {
|
|
fFilterLowshelfAttenuation = attenuationInDb;
|
|
fFilterTransitionFreq = freqInHz;
|
|
fFilterShelfSlope = slope;
|
|
fFilterGain = filterGain;
|
|
|
|
if (fFilterShelfSlope != 0.0f) {
|
|
initializeEQ();
|
|
} else {
|
|
LOGE("Invalid slope, can't be null");
|
|
result = TTS_FAILURE;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
static int
|
|
android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz,
|
|
jobject weak_this, jstring nativeSoLib, jstring engConfig)
|
|
{
|
|
int result = TTS_FAILURE;
|
|
|
|
bUseFilter = false;
|
|
|
|
SynthProxyJniStorage* pJniStorage = new SynthProxyJniStorage();
|
|
|
|
prepAudioTrack(pJniStorage,
|
|
DEFAULT_TTS_STREAM_TYPE, DEFAULT_TTS_RATE, DEFAULT_TTS_FORMAT, DEFAULT_TTS_NB_CHANNELS);
|
|
|
|
const char *nativeSoLibNativeString = env->GetStringUTFChars(nativeSoLib, 0);
|
|
const char *engConfigString = env->GetStringUTFChars(engConfig, 0);
|
|
|
|
void *engine_lib_handle = dlopen(nativeSoLibNativeString,
|
|
RTLD_NOW | RTLD_LOCAL);
|
|
if (engine_lib_handle == NULL) {
|
|
LOGE("android_tts_SynthProxy_native_setup(): engine_lib_handle == NULL");
|
|
} else {
|
|
TtsEngine *(*get_TtsEngine)() =
|
|
reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine"));
|
|
|
|
pJniStorage->mNativeSynthInterface = (*get_TtsEngine)();
|
|
pJniStorage->mEngineLibHandle = engine_lib_handle;
|
|
|
|
if (pJniStorage->mNativeSynthInterface) {
|
|
Mutex::Autolock l(engineMutex);
|
|
pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB, engConfigString);
|
|
}
|
|
|
|
result = TTS_SUCCESS;
|
|
}
|
|
|
|
// we use a weak reference so the SynthProxy object can be garbage collected.
|
|
pJniStorage->tts_ref = env->NewGlobalRef(weak_this);
|
|
|
|
// save the JNI resources so we can use them (and free them) later
|
|
env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData, (int)pJniStorage);
|
|
|
|
env->ReleaseStringUTFChars(nativeSoLib, nativeSoLibNativeString);
|
|
env->ReleaseStringUTFChars(engConfig, engConfigString);
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
static void
|
|
android_tts_SynthProxy_native_finalize(JNIEnv *env, jobject thiz, jint jniData)
|
|
{
|
|
//LOGV("entering android_tts_SynthProxy_finalize()");
|
|
if (jniData == 0) {
|
|
//LOGE("android_tts_SynthProxy_native_finalize(): invalid JNI data");
|
|
return;
|
|
}
|
|
|
|
Mutex::Autolock l(engineMutex);
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
env->DeleteGlobalRef(pSynthData->tts_ref);
|
|
delete pSynthData;
|
|
|
|
env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData, 0);
|
|
}
|
|
|
|
|
|
static void
|
|
android_tts_SynthProxy_shutdown(JNIEnv *env, jobject thiz, jint jniData)
|
|
{
|
|
//LOGV("entering android_tts_SynthProxy_shutdown()");
|
|
|
|
// do everything a call to finalize would
|
|
android_tts_SynthProxy_native_finalize(env, thiz, jniData);
|
|
}
|
|
|
|
|
|
static int
|
|
android_tts_SynthProxy_isLanguageAvailable(JNIEnv *env, jobject thiz, jint jniData,
|
|
jstring language, jstring country, jstring variant)
|
|
{
|
|
int result = TTS_LANG_NOT_SUPPORTED;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_isLanguageAvailable(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
const char *langNativeString = env->GetStringUTFChars(language, 0);
|
|
const char *countryNativeString = env->GetStringUTFChars(country, 0);
|
|
const char *variantNativeString = env->GetStringUTFChars(variant, 0);
|
|
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
result = pSynthData->mNativeSynthInterface->isLanguageAvailable(langNativeString,
|
|
countryNativeString, variantNativeString);
|
|
}
|
|
env->ReleaseStringUTFChars(language, langNativeString);
|
|
env->ReleaseStringUTFChars(country, countryNativeString);
|
|
env->ReleaseStringUTFChars(variant, variantNativeString);
|
|
return result;
|
|
}
|
|
|
|
static int
|
|
android_tts_SynthProxy_setConfig(JNIEnv *env, jobject thiz, jint jniData, jstring engineConfig)
|
|
{
|
|
int result = TTS_FAILURE;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_setConfig(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
Mutex::Autolock l(engineMutex);
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
const char *engineConfigNativeString = env->GetStringUTFChars(engineConfig, 0);
|
|
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
result = pSynthData->mNativeSynthInterface->setProperty(ANDROID_TTS_ENGINE_PROPERTY_CONFIG,
|
|
engineConfigNativeString, strlen(engineConfigNativeString));
|
|
}
|
|
env->ReleaseStringUTFChars(engineConfig, engineConfigNativeString);
|
|
|
|
return result;
|
|
}
|
|
|
|
static int
|
|
android_tts_SynthProxy_setLanguage(JNIEnv *env, jobject thiz, jint jniData,
|
|
jstring language, jstring country, jstring variant)
|
|
{
|
|
int result = TTS_LANG_NOT_SUPPORTED;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_setLanguage(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
Mutex::Autolock l(engineMutex);
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
const char *langNativeString = env->GetStringUTFChars(language, 0);
|
|
const char *countryNativeString = env->GetStringUTFChars(country, 0);
|
|
const char *variantNativeString = env->GetStringUTFChars(variant, 0);
|
|
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
result = pSynthData->mNativeSynthInterface->setLanguage(langNativeString,
|
|
countryNativeString, variantNativeString);
|
|
}
|
|
env->ReleaseStringUTFChars(language, langNativeString);
|
|
env->ReleaseStringUTFChars(country, countryNativeString);
|
|
env->ReleaseStringUTFChars(variant, variantNativeString);
|
|
return result;
|
|
}
|
|
|
|
|
|
static int
|
|
android_tts_SynthProxy_loadLanguage(JNIEnv *env, jobject thiz, jint jniData,
|
|
jstring language, jstring country, jstring variant)
|
|
{
|
|
int result = TTS_LANG_NOT_SUPPORTED;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_loadLanguage(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
const char *langNativeString = env->GetStringUTFChars(language, 0);
|
|
const char *countryNativeString = env->GetStringUTFChars(country, 0);
|
|
const char *variantNativeString = env->GetStringUTFChars(variant, 0);
|
|
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
result = pSynthData->mNativeSynthInterface->loadLanguage(langNativeString,
|
|
countryNativeString, variantNativeString);
|
|
}
|
|
env->ReleaseStringUTFChars(language, langNativeString);
|
|
env->ReleaseStringUTFChars(country, countryNativeString);
|
|
env->ReleaseStringUTFChars(variant, variantNativeString);
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
static int
|
|
android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData,
|
|
jint speechRate)
|
|
{
|
|
int result = TTS_FAILURE;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_setSpeechRate(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
int bufSize = 12;
|
|
char buffer [bufSize];
|
|
sprintf(buffer, "%d", speechRate);
|
|
|
|
Mutex::Autolock l(engineMutex);
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
LOGI("setting speech rate to %d", speechRate);
|
|
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
result = pSynthData->mNativeSynthInterface->setProperty("rate", buffer, bufSize);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
static int
|
|
android_tts_SynthProxy_setPitch(JNIEnv *env, jobject thiz, jint jniData,
|
|
jint pitch)
|
|
{
|
|
int result = TTS_FAILURE;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_setPitch(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
Mutex::Autolock l(engineMutex);
|
|
|
|
int bufSize = 12;
|
|
char buffer [bufSize];
|
|
sprintf(buffer, "%d", pitch);
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
LOGI("setting pitch to %d", pitch);
|
|
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
result = pSynthData->mNativeSynthInterface->setProperty("pitch", buffer, bufSize);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
static int
|
|
android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,
|
|
jstring textJavaString, jstring filenameJavaString)
|
|
{
|
|
int result = TTS_FAILURE;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_synthesizeToFile(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
if (!pSynthData->mNativeSynthInterface) {
|
|
LOGE("android_tts_SynthProxy_synthesizeToFile(): invalid engine handle");
|
|
return result;
|
|
}
|
|
|
|
initializeFilter();
|
|
|
|
Mutex::Autolock l(engineMutex);
|
|
|
|
// Retrieve audio parameters before writing the file header
|
|
AudioSystem::audio_format encoding = DEFAULT_TTS_FORMAT;
|
|
uint32_t rate = DEFAULT_TTS_RATE;
|
|
int channels = DEFAULT_TTS_NB_CHANNELS;
|
|
pSynthData->mNativeSynthInterface->setAudioFormat(encoding, rate, channels);
|
|
|
|
if ((encoding != AudioSystem::PCM_16_BIT) && (encoding != AudioSystem::PCM_8_BIT)) {
|
|
LOGE("android_tts_SynthProxy_synthesizeToFile(): engine uses invalid format");
|
|
return result;
|
|
}
|
|
|
|
const char *filenameNativeString =
|
|
env->GetStringUTFChars(filenameJavaString, 0);
|
|
const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
|
|
|
|
afterSynthData_t* pForAfter = new (afterSynthData_t);
|
|
pForAfter->jniStorage = jniData;
|
|
pForAfter->usageMode = USAGEMODE_WRITE_TO_FILE;
|
|
|
|
pForAfter->outputFile = fopen(filenameNativeString, "wb");
|
|
|
|
if (pForAfter->outputFile == NULL) {
|
|
LOGE("android_tts_SynthProxy_synthesizeToFile(): error creating output file");
|
|
delete pForAfter;
|
|
return result;
|
|
}
|
|
|
|
// Write 44 blank bytes for WAV header, then come back and fill them in
|
|
// after we've written the audio data
|
|
char header[44];
|
|
fwrite(header, 1, 44, pForAfter->outputFile);
|
|
|
|
unsigned int unique_identifier;
|
|
|
|
memset(pSynthData->mBuffer, 0, pSynthData->mBufferSize);
|
|
result = pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
|
|
pSynthData->mBuffer, pSynthData->mBufferSize, (void *)pForAfter);
|
|
|
|
long filelen = ftell(pForAfter->outputFile);
|
|
|
|
int samples = (((int)filelen) - 44) / 2;
|
|
header[0] = 'R';
|
|
header[1] = 'I';
|
|
header[2] = 'F';
|
|
header[3] = 'F';
|
|
((uint32_t *)(&header[4]))[0] = filelen - 8;
|
|
header[8] = 'W';
|
|
header[9] = 'A';
|
|
header[10] = 'V';
|
|
header[11] = 'E';
|
|
|
|
header[12] = 'f';
|
|
header[13] = 'm';
|
|
header[14] = 't';
|
|
header[15] = ' ';
|
|
|
|
((uint32_t *)(&header[16]))[0] = 16; // size of fmt
|
|
|
|
int sampleSizeInByte = (encoding == AudioSystem::PCM_16_BIT ? 2 : 1);
|
|
|
|
((unsigned short *)(&header[20]))[0] = 1; // format
|
|
((unsigned short *)(&header[22]))[0] = channels; // channels
|
|
((uint32_t *)(&header[24]))[0] = rate; // samplerate
|
|
((uint32_t *)(&header[28]))[0] = rate * sampleSizeInByte * channels;// byterate
|
|
((unsigned short *)(&header[32]))[0] = sampleSizeInByte * channels; // block align
|
|
((unsigned short *)(&header[34]))[0] = sampleSizeInByte * 8; // bits per sample
|
|
|
|
header[36] = 'd';
|
|
header[37] = 'a';
|
|
header[38] = 't';
|
|
header[39] = 'a';
|
|
|
|
((uint32_t *)(&header[40]))[0] = samples * 2; // size of data
|
|
|
|
// Skip back to the beginning and rewrite the header
|
|
fseek(pForAfter->outputFile, 0, SEEK_SET);
|
|
fwrite(header, 1, 44, pForAfter->outputFile);
|
|
|
|
fflush(pForAfter->outputFile);
|
|
fclose(pForAfter->outputFile);
|
|
|
|
delete pForAfter;
|
|
pForAfter = NULL;
|
|
|
|
env->ReleaseStringUTFChars(textJavaString, textNativeString);
|
|
env->ReleaseStringUTFChars(filenameJavaString, filenameNativeString);
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
static int
|
|
android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
|
|
jstring textJavaString, jint javaStreamType)
|
|
{
|
|
int result = TTS_FAILURE;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_speak(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
initializeFilter();
|
|
|
|
Mutex::Autolock l(engineMutex);
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
|
|
pSynthData->mPlayLock.lock();
|
|
pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING;
|
|
pSynthData->mPlayLock.unlock();
|
|
|
|
afterSynthData_t* pForAfter = new (afterSynthData_t);
|
|
pForAfter->jniStorage = jniData;
|
|
pForAfter->usageMode = USAGEMODE_PLAY_IMMEDIATELY;
|
|
pForAfter->streamType = (AudioSystem::stream_type) javaStreamType;
|
|
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
|
|
memset(pSynthData->mBuffer, 0, pSynthData->mBufferSize);
|
|
result = pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
|
|
pSynthData->mBuffer, pSynthData->mBufferSize, (void *)pForAfter);
|
|
env->ReleaseStringUTFChars(textJavaString, textNativeString);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
static int
|
|
android_tts_SynthProxy_stop(JNIEnv *env, jobject thiz, jint jniData)
|
|
{
|
|
int result = TTS_FAILURE;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_stop(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
|
|
pSynthData->mPlayLock.lock();
|
|
pSynthData->mPlayState = SYNTHPLAYSTATE_IS_STOPPED;
|
|
if (pSynthData->mAudioOut) {
|
|
pSynthData->mAudioOut->stop();
|
|
}
|
|
pSynthData->mPlayLock.unlock();
|
|
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
result = pSynthData->mNativeSynthInterface->stop();
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
static int
|
|
android_tts_SynthProxy_stopSync(JNIEnv *env, jobject thiz, jint jniData)
|
|
{
|
|
int result = TTS_FAILURE;
|
|
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_stop(): invalid JNI data");
|
|
return result;
|
|
}
|
|
|
|
// perform a regular stop
|
|
result = android_tts_SynthProxy_stop(env, thiz, jniData);
|
|
// but wait on the engine having released the engine mutex which protects
|
|
// the synthesizer resources.
|
|
engineMutex.lock();
|
|
engineMutex.unlock();
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
static jobjectArray
|
|
android_tts_SynthProxy_getLanguage(JNIEnv *env, jobject thiz, jint jniData)
|
|
{
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_getLanguage(): invalid JNI data");
|
|
return NULL;
|
|
}
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
size_t bufSize = 100;
|
|
char lang[bufSize];
|
|
char country[bufSize];
|
|
char variant[bufSize];
|
|
memset(lang, 0, bufSize);
|
|
memset(country, 0, bufSize);
|
|
memset(variant, 0, bufSize);
|
|
jobjectArray retLocale = (jobjectArray)env->NewObjectArray(3,
|
|
env->FindClass("java/lang/String"), env->NewStringUTF(""));
|
|
pSynthData->mNativeSynthInterface->getLanguage(lang, country, variant);
|
|
env->SetObjectArrayElement(retLocale, 0, env->NewStringUTF(lang));
|
|
env->SetObjectArrayElement(retLocale, 1, env->NewStringUTF(country));
|
|
env->SetObjectArrayElement(retLocale, 2, env->NewStringUTF(variant));
|
|
return retLocale;
|
|
} else {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
|
|
JNIEXPORT int JNICALL
|
|
android_tts_SynthProxy_getRate(JNIEnv *env, jobject thiz, jint jniData)
|
|
{
|
|
if (jniData == 0) {
|
|
LOGE("android_tts_SynthProxy_getRate(): invalid JNI data");
|
|
return 0;
|
|
}
|
|
|
|
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
|
|
size_t bufSize = 100;
|
|
|
|
char buf[bufSize];
|
|
memset(buf, 0, bufSize);
|
|
// TODO check return codes
|
|
if (pSynthData->mNativeSynthInterface) {
|
|
pSynthData->mNativeSynthInterface->getProperty("rate", buf, &bufSize);
|
|
}
|
|
return atoi(buf);
|
|
}
|
|
|
|
// Dalvik VM type signatures
|
|
static JNINativeMethod gMethods[] = {
|
|
{ "native_stop",
|
|
"(I)I",
|
|
(void*)android_tts_SynthProxy_stop
|
|
},
|
|
{ "native_stopSync",
|
|
"(I)I",
|
|
(void*)android_tts_SynthProxy_stopSync
|
|
},
|
|
{ "native_speak",
|
|
"(ILjava/lang/String;I)I",
|
|
(void*)android_tts_SynthProxy_speak
|
|
},
|
|
{ "native_synthesizeToFile",
|
|
"(ILjava/lang/String;Ljava/lang/String;)I",
|
|
(void*)android_tts_SynthProxy_synthesizeToFile
|
|
},
|
|
{ "native_isLanguageAvailable",
|
|
"(ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I",
|
|
(void*)android_tts_SynthProxy_isLanguageAvailable
|
|
},
|
|
{ "native_setConfig",
|
|
"(ILjava/lang/String;)I",
|
|
(void*)android_tts_SynthProxy_setConfig
|
|
},
|
|
{ "native_setLanguage",
|
|
"(ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I",
|
|
(void*)android_tts_SynthProxy_setLanguage
|
|
},
|
|
{ "native_loadLanguage",
|
|
"(ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I",
|
|
(void*)android_tts_SynthProxy_loadLanguage
|
|
},
|
|
{ "native_setSpeechRate",
|
|
"(II)I",
|
|
(void*)android_tts_SynthProxy_setSpeechRate
|
|
},
|
|
{ "native_setPitch",
|
|
"(II)I",
|
|
(void*)android_tts_SynthProxy_setPitch
|
|
},
|
|
{ "native_getLanguage",
|
|
"(I)[Ljava/lang/String;",
|
|
(void*)android_tts_SynthProxy_getLanguage
|
|
},
|
|
{ "native_getRate",
|
|
"(I)I",
|
|
(void*)android_tts_SynthProxy_getRate
|
|
},
|
|
{ "native_shutdown",
|
|
"(I)V",
|
|
(void*)android_tts_SynthProxy_shutdown
|
|
},
|
|
{ "native_setup",
|
|
"(Ljava/lang/Object;Ljava/lang/String;Ljava/lang/String;)I",
|
|
(void*)android_tts_SynthProxy_native_setup
|
|
},
|
|
{ "native_setLowShelf",
|
|
"(ZFFFF)I",
|
|
(void*)android_tts_SynthProxy_setLowShelf
|
|
},
|
|
{ "native_finalize",
|
|
"(I)V",
|
|
(void*)android_tts_SynthProxy_native_finalize
|
|
}
|
|
};
|
|
|
|
#define SP_JNIDATA_FIELD_NAME "mJniData"
|
|
#define SP_POSTSPEECHSYNTHESIZED_METHOD_NAME "postNativeSpeechSynthesizedInJava"
|
|
|
|
static const char* const kClassPathName = "android/tts/SynthProxy";
|
|
|
|
jint JNI_OnLoad(JavaVM* vm, void* reserved)
|
|
{
|
|
JNIEnv* env = NULL;
|
|
jint result = -1;
|
|
jclass clazz;
|
|
|
|
if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) {
|
|
LOGE("ERROR: GetEnv failed\n");
|
|
goto bail;
|
|
}
|
|
assert(env != NULL);
|
|
|
|
clazz = env->FindClass(kClassPathName);
|
|
if (clazz == NULL) {
|
|
LOGE("Can't find %s", kClassPathName);
|
|
goto bail;
|
|
}
|
|
|
|
javaTTSFields.synthProxyClass = clazz;
|
|
javaTTSFields.synthProxyFieldJniData = NULL;
|
|
javaTTSFields.synthProxyMethodPost = NULL;
|
|
|
|
javaTTSFields.synthProxyFieldJniData = env->GetFieldID(clazz,
|
|
SP_JNIDATA_FIELD_NAME, "I");
|
|
if (javaTTSFields.synthProxyFieldJniData == NULL) {
|
|
LOGE("Can't find %s.%s field", kClassPathName, SP_JNIDATA_FIELD_NAME);
|
|
goto bail;
|
|
}
|
|
|
|
javaTTSFields.synthProxyMethodPost = env->GetStaticMethodID(clazz,
|
|
SP_POSTSPEECHSYNTHESIZED_METHOD_NAME, "(Ljava/lang/Object;II)V");
|
|
if (javaTTSFields.synthProxyMethodPost == NULL) {
|
|
LOGE("Can't find %s.%s method", kClassPathName, SP_POSTSPEECHSYNTHESIZED_METHOD_NAME);
|
|
goto bail;
|
|
}
|
|
|
|
if (jniRegisterNativeMethods(
|
|
env, kClassPathName, gMethods, NELEM(gMethods)) < 0)
|
|
goto bail;
|
|
|
|
/* success -- return valid version number */
|
|
result = JNI_VERSION_1_4;
|
|
|
|
bail:
|
|
return result;
|
|
}
|