M7350v1_en_gpl

This commit is contained in:
T
2024-09-09 08:52:07 +00:00
commit f9cc65cfda
65988 changed files with 26357421 additions and 0 deletions

View File

@@ -0,0 +1,110 @@
/*---------------------------------------------------------------------------*
* MicrophoneInputStream.java *
* *
* Copyright 2007 Nuance Communciations, Inc. *
* *
* Licensed under the Apache License, Version 2.0 (the 'License'); *
* you may not use this file except in compliance with the License. *
* *
* You may obtain a copy of the License at *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an 'AS IS' BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*---------------------------------------------------------------------------*/
package android.speech.srec;
import java.io.IOException;
import java.io.InputStream;
import java.lang.IllegalStateException;
/**
* PCM input stream from the microphone, 16 bits per sample.
*/
public final class MicrophoneInputStream extends InputStream {
static {
System.loadLibrary("srec_jni");
}
private final static String TAG = "MicrophoneInputStream";
private int mAudioRecord = 0;
private byte[] mOneByte = new byte[1];
/**
* MicrophoneInputStream constructor.
* @param sampleRate sample rate of the microphone, typically 11025 or 8000.
* @param fifoDepth depth of the real time fifo, measured in sampleRate clock ticks.
* This determines how long an application may delay before losing data.
*/
public MicrophoneInputStream(int sampleRate, int fifoDepth) throws IOException {
mAudioRecord = AudioRecordNew(sampleRate, fifoDepth);
if (mAudioRecord == 0) throw new IOException("AudioRecord constructor failed - busy?");
int status = AudioRecordStart(mAudioRecord);
if (status != 0) {
close();
throw new IOException("AudioRecord start failed: " + status);
}
}
@Override
public int read() throws IOException {
if (mAudioRecord == 0) throw new IllegalStateException("not open");
int rtn = AudioRecordRead(mAudioRecord, mOneByte, 0, 1);
return rtn == 1 ? ((int)mOneByte[0] & 0xff) : -1;
}
@Override
public int read(byte[] b) throws IOException {
if (mAudioRecord == 0) throw new IllegalStateException("not open");
return AudioRecordRead(mAudioRecord, b, 0, b.length);
}
@Override
public int read(byte[] b, int offset, int length) throws IOException {
if (mAudioRecord == 0) throw new IllegalStateException("not open");
// TODO: should we force all reads to be a multiple of the sample size?
return AudioRecordRead(mAudioRecord, b, offset, length);
}
/**
* Closes this stream.
*/
@Override
public void close() throws IOException {
if (mAudioRecord != 0) {
try {
AudioRecordStop(mAudioRecord);
} finally {
try {
AudioRecordDelete(mAudioRecord);
} finally {
mAudioRecord = 0;
}
}
}
}
@Override
protected void finalize() throws Throwable {
if (mAudioRecord != 0) {
close();
throw new IOException("someone forgot to close MicrophoneInputStream");
}
}
//
// AudioRecord JNI interface
//
private static native int AudioRecordNew(int sampleRate, int fifoDepth);
private static native int AudioRecordStart(int audioRecord);
private static native int AudioRecordRead(int audioRecord, byte[] b, int offset, int length) throws IOException;
private static native void AudioRecordStop(int audioRecord) throws IOException;
private static native void AudioRecordDelete(int audioRecord) throws IOException;
}

View File

@@ -0,0 +1,719 @@
/*
* ---------------------------------------------------------------------------
* Recognizer.java
*
* Copyright 2007 Nuance Communciations, Inc.
*
* Licensed under the Apache License, Version 2.0 (the 'License'); you may not
* use this file except in compliance with the License.
*
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
* ---------------------------------------------------------------------------
*/
package android.speech.srec;
import android.util.Config;
import android.util.Log;
import java.io.File;
import java.io.InputStream;
import java.io.IOException;
import java.util.Locale;
/**
* Simple, synchronous speech recognizer, using the Nuance SREC package.
* Usages proceeds as follows:
*
* <ul>
* <li>Create a <code>Recognizer</code>.
* <li>Create a <code>Recognizer.Grammar</code>.
* <li>Setup the <code>Recognizer.Grammar</code>.
* <li>Reset the <code>Recognizer.Grammar</code> slots, if needed.
* <li>Fill the <code>Recognizer.Grammar</code> slots, if needed.
* <li>Compile the <code>Recognizer.Grammar</code>, if needed.
* <li>Save the filled <code>Recognizer.Grammar</code>, if needed.
* <li>Start the <code>Recognizer</code>.
* <li>Loop over <code>advance</code> and <code>putAudio</code> until recognition complete.
* <li>Fetch and process results, or notify of failure.
* <li>Stop the <code>Recognizer</code>.
* <li>Destroy the <code>Recognizer</code>.
* </ul>
*
* <p>Below is example code</p>
*
* <pre class="prettyprint">
*
* // create and start audio input
* InputStream audio = new MicrophoneInputStream(11025, 11025*5);
* // create a Recognizer
* String cdir = Recognizer.getConfigDir(null);
* Recognizer recognizer = new Recognizer(cdir + "/baseline11k.par");
* // create and load a Grammar
* Recognizer.Grammar grammar = recognizer.new Grammar(cdir + "/grammars/VoiceDialer.g2g");
* // setup the Grammar to work with the Recognizer
* grammar.setupRecognizer();
* // fill the Grammar slots with names and save, if required
* grammar.resetAllSlots();
* for (String name : names) grammar.addWordToSlot("@Names", name, null, 1, "V=1");
* grammar.compile();
* grammar.save(".../foo.g2g");
* // start the Recognizer
* recognizer.start();
* // loop over Recognizer events
* while (true) {
* switch (recognizer.advance()) {
* case Recognizer.EVENT_INCOMPLETE:
* case Recognizer.EVENT_STARTED:
* case Recognizer.EVENT_START_OF_VOICING:
* case Recognizer.EVENT_END_OF_VOICING:
* // let the Recognizer continue to run
* continue;
* case Recognizer.EVENT_RECOGNITION_RESULT:
* // success, so fetch results here!
* for (int i = 0; i < recognizer.getResultCount(); i++) {
* String result = recognizer.getResult(i, Recognizer.KEY_LITERAL);
* }
* break;
* case Recognizer.EVENT_NEED_MORE_AUDIO:
* // put more audio in the Recognizer
* recognizer.putAudio(audio);
* continue;
* default:
* notifyFailure();
* break;
* }
* break;
* }
* // stop the Recognizer
* recognizer.stop();
* // destroy the Recognizer
* recognizer.destroy();
* // stop the audio device
* audio.close();
*
* </pre>
*/
public final class Recognizer {
static {
System.loadLibrary("srec_jni");
}
private static String TAG = "Recognizer";
/**
* Result key corresponding to confidence score.
*/
public static final String KEY_CONFIDENCE = "conf";
/**
* Result key corresponding to literal text.
*/
public static final String KEY_LITERAL = "literal";
/**
* Result key corresponding to semantic meaning text.
*/
public static final String KEY_MEANING = "meaning";
// handle to SR_Vocabulary object
private int mVocabulary = 0;
// handle to SR_Recognizer object
private int mRecognizer = 0;
// Grammar currently associated with Recognizer via SR_GrammarSetupRecognizer
private Grammar mActiveGrammar = null;
/**
* Get the pathname of the SREC configuration directory corresponding to the
* language indicated by the Locale.
* This directory contains dictionaries, speech models,
* configuration files, and other data needed by the Recognizer.
* @param locale <code>Locale</code> corresponding to the desired language,
* or null for default, currently <code>Locale.US</code>.
* @return Pathname of the configuration directory.
*/
public static String getConfigDir(Locale locale) {
if (locale == null) locale = Locale.US;
String dir = "/system/usr/srec/config/" +
locale.toString().replace('_', '.').toLowerCase();
if ((new File(dir)).isDirectory()) return dir;
return null;
}
/**
* Create an instance of a SREC speech recognizer.
*
* @param configFile pathname of the baseline*.par configuration file,
* which in turn contains references to dictionaries, speech models,
* and other data needed to configure and operate the recognizer.
* A separate config file is needed for each audio sample rate.
* Two files, baseline11k.par and baseline8k.par, which correspond to
* 11025 and 8000 hz, are present in the directory indicated by
* {@link #getConfigDir}.
* @throws IOException
*/
public Recognizer(String configFile) throws IOException {
PMemInit();
SR_SessionCreate(configFile);
mRecognizer = SR_RecognizerCreate();
SR_RecognizerSetup(mRecognizer);
mVocabulary = SR_VocabularyLoad();
}
/**
* Represents a grammar loaded into the Recognizer.
*/
public class Grammar {
private int mGrammar = 0;
/**
* Create a <code>Grammar</code> instance.
* @param g2gFileName pathname of g2g file.
*/
public Grammar(String g2gFileName) throws IOException {
mGrammar = SR_GrammarLoad(g2gFileName);
SR_GrammarSetupVocabulary(mGrammar, mVocabulary);
}
/**
* Reset all slots.
*/
public void resetAllSlots() {
SR_GrammarResetAllSlots(mGrammar);
}
/**
* Add a word to a slot.
*
* @param slot slot name.
* @param word word to insert.
* @param pron pronunciation, or null to derive from word.
* @param weight weight to give the word. One is normal, 50 is low.
* @param tag semantic meaning tag string.
*/
public void addWordToSlot(String slot, String word, String pron, int weight, String tag) {
SR_GrammarAddWordToSlot(mGrammar, slot, word, pron, weight, tag);
}
/**
* Compile all slots.
*/
public void compile() {
SR_GrammarCompile(mGrammar);
}
/**
* Setup <code>Grammar</code> with <code>Recognizer</code>.
*/
public void setupRecognizer() {
SR_GrammarSetupRecognizer(mGrammar, mRecognizer);
mActiveGrammar = this;
}
/**
* Save <code>Grammar</code> to g2g file.
*
* @param g2gFileName
* @throws IOException
*/
public void save(String g2gFileName) throws IOException {
SR_GrammarSave(mGrammar, g2gFileName);
}
/**
* Release resources associated with this <code>Grammar</code>.
*/
public void destroy() {
// TODO: need to do cleanup and disassociation with Recognizer
if (mGrammar != 0) {
SR_GrammarDestroy(mGrammar);
mGrammar = 0;
}
}
/**
* Clean up resources.
*/
protected void finalize() {
if (mGrammar != 0) {
destroy();
throw new IllegalStateException("someone forgot to destroy Grammar");
}
}
}
/**
* Start recognition
*/
public void start() {
// TODO: shouldn't be here?
SR_RecognizerActivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash", 1);
SR_RecognizerStart(mRecognizer);
}
/**
* Process some audio and return the current status.
* @return recognition event, one of:
* <ul>
* <li><code>EVENT_INVALID</code>
* <li><code>EVENT_NO_MATCH</code>
* <li><code>EVENT_INCOMPLETE</code>
* <li><code>EVENT_STARTED</code>
* <li><code>EVENT_STOPPED</code>
* <li><code>EVENT_START_OF_VOICING</code>
* <li><code>EVENT_END_OF_VOICING</code>
* <li><code>EVENT_SPOKE_TOO_SOON</code>
* <li><code>EVENT_RECOGNITION_RESULT</code>
* <li><code>EVENT_START_OF_UTTERANCE_TIMEOUT</code>
* <li><code>EVENT_RECOGNITION_TIMEOUT</code>
* <li><code>EVENT_NEED_MORE_AUDIO</code>
* <li><code>EVENT_MAX_SPEECH</code>
* </ul>
*/
public int advance() {
return SR_RecognizerAdvance(mRecognizer);
}
/**
* Put audio samples into the <code>Recognizer</code>.
* @param buf holds the audio samples.
* @param offset offset of the first sample.
* @param length number of bytes containing samples.
* @param isLast indicates no more audio data, normally false.
* @return number of bytes accepted.
*/
public int putAudio(byte[] buf, int offset, int length, boolean isLast) {
return SR_RecognizerPutAudio(mRecognizer, buf, offset, length, isLast);
}
/**
* Read audio samples from an <code>InputStream</code> and put them in the
* <code>Recognizer</code>.
* @param audio <code>InputStream</code> containing PCM audio samples.
*/
public void putAudio(InputStream audio) throws IOException {
// make sure the audio buffer is allocated
if (mPutAudioBuffer == null) mPutAudioBuffer = new byte[512];
// read some data
int nbytes = audio.read(mPutAudioBuffer);
// eof, so signal Recognizer
if (nbytes == -1) {
SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, 0, true);
}
// put it into the Recognizer
else if (nbytes != SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, nbytes, false)) {
throw new IOException("SR_RecognizerPutAudio failed nbytes=" + nbytes);
}
}
// audio buffer for putAudio(InputStream)
private byte[] mPutAudioBuffer = null;
/**
* Get the number of recognition results. Must be called after
* <code>EVENT_RECOGNITION_RESULT</code> is returned by
* <code>advance</code>, but before <code>stop</code>.
*
* @return number of results in nbest list.
*/
public int getResultCount() {
return SR_RecognizerResultGetSize(mRecognizer);
}
/**
* Get a set of keys for the result. Must be called after
* <code>EVENT_RECOGNITION_RESULT</code> is returned by
* <code>advance</code>, but before <code>stop</code>.
*
* @param index index of result.
* @return array of keys.
*/
public String[] getResultKeys(int index) {
return SR_RecognizerResultGetKeyList(mRecognizer, index);
}
/**
* Get a result value. Must be called after
* <code>EVENT_RECOGNITION_RESULT</code> is returned by
* <code>advance</code>, but before <code>stop</code>.
*
* @param index index of the result.
* @param key key of the result. This is typically one of
* <code>KEY_CONFIDENCE</code>, <code>KEY_LITERAL</code>, or
* <code>KEY_MEANING</code>, but the user can also define their own keys
* in a grxml file, or in the <code>tag</code> slot of
* <code>Grammar.addWordToSlot</code>.
* @return the result.
*/
public String getResult(int index, String key) {
return SR_RecognizerResultGetValue(mRecognizer, index, key);
}
/**
* Stop the <code>Recognizer</code>.
*/
public void stop() {
SR_RecognizerStop(mRecognizer);
SR_RecognizerDeactivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash");
}
/**
* Reset the acoustic state vectorto it's default value.
*
* @hide
*/
public void resetAcousticState() {
SR_AcousticStateReset(mRecognizer);
}
/**
* Set the acoustic state vector.
* @param state String containing the acoustic state vector.
*
* @hide
*/
public void setAcousticState(String state) {
SR_AcousticStateSet(mRecognizer, state);
}
/**
* Get the acoustic state vector.
* @return String containing the acoustic state vector.
*
* @hide
*/
public String getAcousticState() {
return SR_AcousticStateGet(mRecognizer);
}
/**
* Clean up resources.
*/
public void destroy() {
try {
if (mVocabulary != 0) SR_VocabularyDestroy(mVocabulary);
} finally {
mVocabulary = 0;
try {
if (mRecognizer != 0) SR_RecognizerUnsetup(mRecognizer);
} finally {
try {
if (mRecognizer != 0) SR_RecognizerDestroy(mRecognizer);
} finally {
mRecognizer = 0;
try {
SR_SessionDestroy();
} finally {
PMemShutdown();
}
}
}
}
}
/**
* Clean up resources.
*/
protected void finalize() throws Throwable {
if (mVocabulary != 0 || mRecognizer != 0) {
destroy();
throw new IllegalStateException("someone forgot to destroy Recognizer");
}
}
/* an example session captured, for reference
void doall() {
if (PMemInit ( )
|| lhs_audioinOpen ( WAVE_MAPPER, SREC_TEST_DEFAULT_AUDIO_FREQUENCY, &audio_in_handle )
|| srec_test_init_application_data ( &applicationData, argc, argv )
|| SR_SessionCreate ( "/system/usr/srec/config/en.us/baseline11k.par" )
|| SR_RecognizerCreate ( &applicationData.recognizer )
|| SR_RecognizerSetup ( applicationData.recognizer)
|| ESR_SessionGetLCHAR ( L("cmdline.vocabulary"), filename, &flen )
|| SR_VocabularyLoad ( filename, &applicationData.vocabulary )
|| SR_VocabularyGetLanguage ( applicationData.vocabulary, &applicationData.locale )
|| (applicationData.nametag = NULL)
|| SR_NametagsCreate ( &applicationData.nametags )
|| (LSTRCPY ( applicationData.grammars [0].grammar_path, "/system/usr/srec/config/en.us/grammars/VoiceDialer.g2g" ), 0)
|| (LSTRCPY ( applicationData.grammars [0].grammarID, "BothTags" ), 0)
|| (LSTRCPY ( applicationData.grammars [0].ruleName, "trash" ), 0)
|| (applicationData.grammars [0].is_ve_grammar = ESR_FALSE, 0)
|| SR_GrammarLoad (applicationData.grammars [0].grammar_path, &applicationData.grammars [applicationData.grammarCount].grammar )
|| SR_GrammarSetupVocabulary ( applicationData.grammars [0].grammar, applicationData.vocabulary )
|| SR_GrammarSetupRecognizer( applicationData.grammars [0].grammar, applicationData.recognizer )
|| SR_GrammarSetDispatchFunction ( applicationData.grammars [0].grammar, L("myDSMCallback"), NULL, myDSMCallback )
|| (applicationData.grammarCount++, 0)
|| SR_RecognizerActivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar,
applicationData.grammars [0].ruleName, 1 )
|| (applicationData.active_grammar_num = 0, 0)
|| lhs_audioinStart ( audio_in_handle )
|| SR_RecognizerStart ( applicationData.recognizer )
|| strl ( applicationData.grammars [0].grammar, &applicationData, audio_in_handle, &recognition_count )
|| SR_RecognizerStop ( applicationData.recognizer )
|| lhs_audioinStop ( audio_in_handle )
|| SR_RecognizerDeactivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar, applicationData.grammars [0].ruleName )
|| (applicationData.active_grammar_num = -1, 0)
|| SR_GrammarDestroy ( applicationData.grammars [0].grammar )
|| (applicationData.grammarCount--, 0)
|| SR_NametagsDestroy ( applicationData.nametags )
|| (applicationData.nametags = NULL, 0)
|| SR_VocabularyDestroy ( applicationData.vocabulary )
|| (applicationData.vocabulary = NULL)
|| SR_RecognizerUnsetup ( applicationData.recognizer) // releases acoustic models
|| SR_RecognizerDestroy ( applicationData.recognizer )
|| (applicationData.recognizer = NULL)
|| SR_SessionDestroy ( )
|| srec_test_shutdown_application_data ( &applicationData )
|| lhs_audioinClose ( &audio_in_handle )
|| PMemShutdown ( )
}
*/
//
// PMem native methods
//
private static native void PMemInit();
private static native void PMemShutdown();
//
// SR_Session native methods
//
private static native void SR_SessionCreate(String filename);
private static native void SR_SessionDestroy();
//
// SR_Recognizer native methods
//
/**
* Reserved value.
*/
public final static int EVENT_INVALID = 0;
/**
* <code>Recognizer</code> could not find a match for the utterance.
*/
public final static int EVENT_NO_MATCH = 1;
/**
* <code>Recognizer</code> processed one frame of audio.
*/
public final static int EVENT_INCOMPLETE = 2;
/**
* <code>Recognizer</code> has just been started.
*/
public final static int EVENT_STARTED = 3;
/**
* <code>Recognizer</code> is stopped.
*/
public final static int EVENT_STOPPED = 4;
/**
* Beginning of speech detected.
*/
public final static int EVENT_START_OF_VOICING = 5;
/**
* End of speech detected.
*/
public final static int EVENT_END_OF_VOICING = 6;
/**
* Beginning of utterance occured too soon.
*/
public final static int EVENT_SPOKE_TOO_SOON = 7;
/**
* Recognition match detected.
*/
public final static int EVENT_RECOGNITION_RESULT = 8;
/**
* Timeout occured before beginning of utterance.
*/
public final static int EVENT_START_OF_UTTERANCE_TIMEOUT = 9;
/**
* Timeout occured before speech recognition could complete.
*/
public final static int EVENT_RECOGNITION_TIMEOUT = 10;
/**
* Not enough samples to process one frame.
*/
public final static int EVENT_NEED_MORE_AUDIO = 11;
/**
* More audio encountered than is allowed by 'swirec_max_speech_duration'.
*/
public final static int EVENT_MAX_SPEECH = 12;
/**
* Produce a displayable string from an <code>advance</code> event.
* @param event
* @return String representing the event.
*/
public static String eventToString(int event) {
switch (event) {
case EVENT_INVALID:
return "EVENT_INVALID";
case EVENT_NO_MATCH:
return "EVENT_NO_MATCH";
case EVENT_INCOMPLETE:
return "EVENT_INCOMPLETE";
case EVENT_STARTED:
return "EVENT_STARTED";
case EVENT_STOPPED:
return "EVENT_STOPPED";
case EVENT_START_OF_VOICING:
return "EVENT_START_OF_VOICING";
case EVENT_END_OF_VOICING:
return "EVENT_END_OF_VOICING";
case EVENT_SPOKE_TOO_SOON:
return "EVENT_SPOKE_TOO_SOON";
case EVENT_RECOGNITION_RESULT:
return "EVENT_RECOGNITION_RESULT";
case EVENT_START_OF_UTTERANCE_TIMEOUT:
return "EVENT_START_OF_UTTERANCE_TIMEOUT";
case EVENT_RECOGNITION_TIMEOUT:
return "EVENT_RECOGNITION_TIMEOUT";
case EVENT_NEED_MORE_AUDIO:
return "EVENT_NEED_MORE_AUDIO";
case EVENT_MAX_SPEECH:
return "EVENT_MAX_SPEECH";
}
return "EVENT_" + event;
}
//
// SR_Recognizer methods
//
private static native void SR_RecognizerStart(int recognizer);
private static native void SR_RecognizerStop(int recognizer);
private static native int SR_RecognizerCreate();
private static native void SR_RecognizerDestroy(int recognizer);
private static native void SR_RecognizerSetup(int recognizer);
private static native void SR_RecognizerUnsetup(int recognizer);
private static native boolean SR_RecognizerIsSetup(int recognizer);
private static native String SR_RecognizerGetParameter(int recognizer, String key);
private static native int SR_RecognizerGetSize_tParameter(int recognizer, String key);
private static native boolean SR_RecognizerGetBoolParameter(int recognizer, String key);
private static native void SR_RecognizerSetParameter(int recognizer, String key, String value);
private static native void SR_RecognizerSetSize_tParameter(int recognizer,
String key, int value);
private static native void SR_RecognizerSetBoolParameter(int recognizer, String key,
boolean value);
private static native void SR_RecognizerSetupRule(int recognizer, int grammar,
String ruleName);
private static native boolean SR_RecognizerHasSetupRules(int recognizer);
private static native void SR_RecognizerActivateRule(int recognizer, int grammar,
String ruleName, int weight);
private static native void SR_RecognizerDeactivateRule(int recognizer, int grammar,
String ruleName);
private static native void SR_RecognizerDeactivateAllRules(int recognizer);
private static native boolean SR_RecognizerIsActiveRule(int recognizer, int grammar,
String ruleName);
private static native boolean SR_RecognizerCheckGrammarConsistency(int recognizer,
int grammar);
private static native int SR_RecognizerPutAudio(int recognizer, byte[] buffer, int offset,
int length, boolean isLast);
private static native int SR_RecognizerAdvance(int recognizer);
// private static native void SR_RecognizerLoadUtterance(int recognizer,
// const LCHAR* filename);
// private static native void SR_RecognizerLoadWaveFile(int recognizer,
// const LCHAR* filename);
// private static native void SR_RecognizerSetLockFunction(int recognizer,
// SR_RecognizerLockFunction function, void* data);
private static native boolean SR_RecognizerIsSignalClipping(int recognizer);
private static native boolean SR_RecognizerIsSignalDCOffset(int recognizer);
private static native boolean SR_RecognizerIsSignalNoisy(int recognizer);
private static native boolean SR_RecognizerIsSignalTooQuiet(int recognizer);
private static native boolean SR_RecognizerIsSignalTooFewSamples(int recognizer);
private static native boolean SR_RecognizerIsSignalTooManySamples(int recognizer);
// private static native void SR_Recognizer_Change_Sample_Rate (size_t new_sample_rate);
//
// SR_AcousticState native methods
//
private static native void SR_AcousticStateReset(int recognizer);
private static native void SR_AcousticStateSet(int recognizer, String state);
private static native String SR_AcousticStateGet(int recognizer);
//
// SR_Grammar native methods
//
private static native void SR_GrammarCompile(int grammar);
private static native void SR_GrammarAddWordToSlot(int grammar, String slot,
String word, String pronunciation, int weight, String tag);
private static native void SR_GrammarResetAllSlots(int grammar);
// private static native void SR_GrammarAddNametagToSlot(int grammar, String slot,
// const struct SR_Nametag_t* nametag, int weight, String tag);
private static native void SR_GrammarSetupVocabulary(int grammar, int vocabulary);
// private static native void SR_GrammarSetupModels(int grammar, SR_AcousticModels* models);
private static native void SR_GrammarSetupRecognizer(int grammar, int recognizer);
private static native void SR_GrammarUnsetupRecognizer(int grammar);
// private static native void SR_GrammarGetModels(int grammar,SR_AcousticModels** models);
private static native int SR_GrammarCreate();
private static native void SR_GrammarDestroy(int grammar);
private static native int SR_GrammarLoad(String filename);
private static native void SR_GrammarSave(int grammar, String filename);
// private static native void SR_GrammarSetDispatchFunction(int grammar,
// const LCHAR* name, void* userData, SR_GrammarDispatchFunction function);
// private static native void SR_GrammarSetParameter(int grammar, const
// LCHAR* key, void* value);
// private static native void SR_GrammarSetSize_tParameter(int grammar,
// const LCHAR* key, size_t value);
// private static native void SR_GrammarGetParameter(int grammar, const
// LCHAR* key, void** value);
// private static native void SR_GrammarGetSize_tParameter(int grammar,
// const LCHAR* key, size_t* value);
// private static native void SR_GrammarCheckParse(int grammar, const LCHAR*
// transcription, SR_SemanticResult** result, size_t* resultCount);
private static native void SR_GrammarAllowOnly(int grammar, String transcription);
private static native void SR_GrammarAllowAll(int grammar);
//
// SR_Vocabulary native methods
//
// private static native int SR_VocabularyCreate();
private static native int SR_VocabularyLoad();
// private static native void SR_VocabularySave(SR_Vocabulary* self,
// const LCHAR* filename);
// private static native void SR_VocabularyAddWord(SR_Vocabulary* self,
// const LCHAR* word);
// private static native void SR_VocabularyGetLanguage(SR_Vocabulary* self,
// ESR_Locale* locale);
private static native void SR_VocabularyDestroy(int vocabulary);
private static native String SR_VocabularyGetPronunciation(int vocabulary, String word);
//
// SR_RecognizerResult native methods
//
private static native byte[] SR_RecognizerResultGetWaveform(int recognizer);
private static native int SR_RecognizerResultGetSize(int recognizer);
private static native int SR_RecognizerResultGetKeyCount(int recognizer, int nbest);
private static native String[] SR_RecognizerResultGetKeyList(int recognizer, int nbest);
private static native String SR_RecognizerResultGetValue(int recognizer,
int nbest, String key);
// private static native void SR_RecognizerResultGetLocale(int recognizer, ESR_Locale* locale);
}

View File

@@ -0,0 +1,187 @@
/*
* ---------------------------------------------------------------------------
* UlawEncoderInputStream.java
*
* Copyright 2008 Nuance Communciations, Inc.
*
* Licensed under the Apache License, Version 2.0 (the 'License'); you may not
* use this file except in compliance with the License.
*
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
* ---------------------------------------------------------------------------
*/
package android.speech.srec;
import java.io.IOException;
import java.io.InputStream;
/**
* InputStream which transforms 16 bit pcm data to ulaw data.
*
* Not yet ready to be supported, so
* @hide
*/
public final class UlawEncoderInputStream extends InputStream {
private final static String TAG = "UlawEncoderInputStream";
private final static int MAX_ULAW = 8192;
private final static int SCALE_BITS = 16;
private InputStream mIn;
private int mMax = 0;
private final byte[] mBuf = new byte[1024];
private int mBufCount = 0; // should be 0 or 1
private final byte[] mOneByte = new byte[1];
public static void encode(byte[] pcmBuf, int pcmOffset,
byte[] ulawBuf, int ulawOffset, int length, int max) {
// from 'ulaw' in wikipedia
// +8191 to +8159 0x80
// +8158 to +4063 in 16 intervals of 256 0x80 + interval number
// +4062 to +2015 in 16 intervals of 128 0x90 + interval number
// +2014 to +991 in 16 intervals of 64 0xA0 + interval number
// +990 to +479 in 16 intervals of 32 0xB0 + interval number
// +478 to +223 in 16 intervals of 16 0xC0 + interval number
// +222 to +95 in 16 intervals of 8 0xD0 + interval number
// +94 to +31 in 16 intervals of 4 0xE0 + interval number
// +30 to +1 in 15 intervals of 2 0xF0 + interval number
// 0 0xFF
// -1 0x7F
// -31 to -2 in 15 intervals of 2 0x70 + interval number
// -95 to -32 in 16 intervals of 4 0x60 + interval number
// -223 to -96 in 16 intervals of 8 0x50 + interval number
// -479 to -224 in 16 intervals of 16 0x40 + interval number
// -991 to -480 in 16 intervals of 32 0x30 + interval number
// -2015 to -992 in 16 intervals of 64 0x20 + interval number
// -4063 to -2016 in 16 intervals of 128 0x10 + interval number
// -8159 to -4064 in 16 intervals of 256 0x00 + interval number
// -8192 to -8160 0x00
// set scale factors
if (max <= 0) max = MAX_ULAW;
int coef = MAX_ULAW * (1 << SCALE_BITS) / max;
for (int i = 0; i < length; i++) {
int pcm = (0xff & pcmBuf[pcmOffset++]) + (pcmBuf[pcmOffset++] << 8);
pcm = (pcm * coef) >> SCALE_BITS;
int ulaw;
if (pcm >= 0) {
ulaw = pcm <= 0 ? 0xff :
pcm <= 30 ? 0xf0 + (( 30 - pcm) >> 1) :
pcm <= 94 ? 0xe0 + (( 94 - pcm) >> 2) :
pcm <= 222 ? 0xd0 + (( 222 - pcm) >> 3) :
pcm <= 478 ? 0xc0 + (( 478 - pcm) >> 4) :
pcm <= 990 ? 0xb0 + (( 990 - pcm) >> 5) :
pcm <= 2014 ? 0xa0 + ((2014 - pcm) >> 6) :
pcm <= 4062 ? 0x90 + ((4062 - pcm) >> 7) :
pcm <= 8158 ? 0x80 + ((8158 - pcm) >> 8) :
0x80;
} else {
ulaw = -1 <= pcm ? 0x7f :
-31 <= pcm ? 0x70 + ((pcm - -31) >> 1) :
-95 <= pcm ? 0x60 + ((pcm - -95) >> 2) :
-223 <= pcm ? 0x50 + ((pcm - -223) >> 3) :
-479 <= pcm ? 0x40 + ((pcm - -479) >> 4) :
-991 <= pcm ? 0x30 + ((pcm - -991) >> 5) :
-2015 <= pcm ? 0x20 + ((pcm - -2015) >> 6) :
-4063 <= pcm ? 0x10 + ((pcm - -4063) >> 7) :
-8159 <= pcm ? 0x00 + ((pcm - -8159) >> 8) :
0x00;
}
ulawBuf[ulawOffset++] = (byte)ulaw;
}
}
/**
* Compute the maximum of the absolute value of the pcm samples.
* The return value can be used to set ulaw encoder scaling.
* @param pcmBuf array containing 16 bit pcm data.
* @param offset offset of start of 16 bit pcm data.
* @param length number of pcm samples (not number of input bytes)
* @return maximum abs of pcm data values
*/
public static int maxAbsPcm(byte[] pcmBuf, int offset, int length) {
int max = 0;
for (int i = 0; i < length; i++) {
int pcm = (0xff & pcmBuf[offset++]) + (pcmBuf[offset++] << 8);
if (pcm < 0) pcm = -pcm;
if (pcm > max) max = pcm;
}
return max;
}
/**
* Create an InputStream which takes 16 bit pcm data and produces ulaw data.
* @param in InputStream containing 16 bit pcm data.
* @param max pcm value corresponding to maximum ulaw value.
*/
public UlawEncoderInputStream(InputStream in, int max) {
mIn = in;
mMax = max;
}
@Override
public int read(byte[] buf, int offset, int length) throws IOException {
if (mIn == null) throw new IllegalStateException("not open");
// return at least one byte, but try to fill 'length'
while (mBufCount < 2) {
int n = mIn.read(mBuf, mBufCount, Math.min(length * 2, mBuf.length - mBufCount));
if (n == -1) return -1;
mBufCount += n;
}
// compand data
int n = Math.min(mBufCount / 2, length);
encode(mBuf, 0, buf, offset, n, mMax);
// move data to bottom of mBuf
mBufCount -= n * 2;
for (int i = 0; i < mBufCount; i++) mBuf[i] = mBuf[i + n * 2];
return n;
}
@Override
public int read(byte[] buf) throws IOException {
return read(buf, 0, buf.length);
}
@Override
public int read() throws IOException {
int n = read(mOneByte, 0, 1);
if (n == -1) return -1;
return 0xff & (int)mOneByte[0];
}
@Override
public void close() throws IOException {
if (mIn != null) {
InputStream in = mIn;
mIn = null;
in.close();
}
}
@Override
public int available() throws IOException {
return (mIn.available() + mBufCount) / 2;
}
}

View File

@@ -0,0 +1,276 @@
/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.speech.srec;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* This class represents the header of a WAVE format audio file, which usually
* have a .wav suffix. The following integer valued fields are contained:
* <ul>
* <li> format - usually PCM, ALAW or ULAW.
* <li> numChannels - 1 for mono, 2 for stereo.
* <li> sampleRate - usually 8000, 11025, 16000, 22050, or 44100 hz.
* <li> bitsPerSample - usually 16 for PCM, 8 for ALAW, or 8 for ULAW.
* <li> numBytes - size of audio data after this header, in bytes.
* </ul>
*
* Not yet ready to be supported, so
* @hide
*/
public class WaveHeader {
// follows WAVE format in http://ccrma.stanford.edu/courses/422/projects/WaveFormat
private static final String TAG = "WaveHeader";
private static final int HEADER_LENGTH = 44;
/** Indicates PCM format. */
public static final short FORMAT_PCM = 1;
/** Indicates ALAW format. */
public static final short FORMAT_ALAW = 6;
/** Indicates ULAW format. */
public static final short FORMAT_ULAW = 7;
private short mFormat;
private short mNumChannels;
private int mSampleRate;
private short mBitsPerSample;
private int mNumBytes;
/**
* Construct a WaveHeader, with all fields defaulting to zero.
*/
public WaveHeader() {
}
/**
* Construct a WaveHeader, with fields initialized.
* @param format format of audio data,
* one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.
* @param numChannels 1 for mono, 2 for stereo.
* @param sampleRate typically 8000, 11025, 16000, 22050, or 44100 hz.
* @param bitsPerSample usually 16 for PCM, 8 for ULAW or 8 for ALAW.
* @param numBytes size of audio data after this header, in bytes.
*/
public WaveHeader(short format, short numChannels, int sampleRate, short bitsPerSample, int numBytes) {
mFormat = format;
mSampleRate = sampleRate;
mNumChannels = numChannels;
mBitsPerSample = bitsPerSample;
mNumBytes = numBytes;
}
/**
* Get the format field.
* @return format field,
* one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.
*/
public short getFormat() {
return mFormat;
}
/**
* Set the format field.
* @param format
* one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.
* @return reference to this WaveHeader instance.
*/
public WaveHeader setFormat(short format) {
mFormat = format;
return this;
}
/**
* Get the number of channels.
* @return number of channels, 1 for mono, 2 for stereo.
*/
public short getNumChannels() {
return mNumChannels;
}
/**
* Set the number of channels.
* @param numChannels 1 for mono, 2 for stereo.
* @return reference to this WaveHeader instance.
*/
public WaveHeader setNumChannels(short numChannels) {
mNumChannels = numChannels;
return this;
}
/**
* Get the sample rate.
* @return sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz.
*/
public int getSampleRate() {
return mSampleRate;
}
/**
* Set the sample rate.
* @param sampleRate sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz.
* @return reference to this WaveHeader instance.
*/
public WaveHeader setSampleRate(int sampleRate) {
mSampleRate = sampleRate;
return this;
}
/**
* Get the number of bits per sample.
* @return number of bits per sample,
* usually 16 for PCM, 8 for ULAW or 8 for ALAW.
*/
public short getBitsPerSample() {
return mBitsPerSample;
}
/**
* Set the number of bits per sample.
* @param bitsPerSample number of bits per sample,
* usually 16 for PCM, 8 for ULAW or 8 for ALAW.
* @return reference to this WaveHeader instance.
*/
public WaveHeader setBitsPerSample(short bitsPerSample) {
mBitsPerSample = bitsPerSample;
return this;
}
/**
* Get the size of audio data after this header, in bytes.
* @return size of audio data after this header, in bytes.
*/
public int getNumBytes() {
return mNumBytes;
}
/**
* Set the size of audio data after this header, in bytes.
* @param numBytes size of audio data after this header, in bytes.
* @return reference to this WaveHeader instance.
*/
public WaveHeader setNumBytes(int numBytes) {
mNumBytes = numBytes;
return this;
}
/**
* Read and initialize a WaveHeader.
* @param in {@link java.io.InputStream} to read from.
* @return number of bytes consumed.
* @throws IOException
*/
public int read(InputStream in) throws IOException {
/* RIFF header */
readId(in, "RIFF");
int numBytes = readInt(in) - 36;
readId(in, "WAVE");
/* fmt chunk */
readId(in, "fmt ");
if (16 != readInt(in)) throw new IOException("fmt chunk length not 16");
mFormat = readShort(in);
mNumChannels = readShort(in);
mSampleRate = readInt(in);
int byteRate = readInt(in);
short blockAlign = readShort(in);
mBitsPerSample = readShort(in);
if (byteRate != mNumChannels * mSampleRate * mBitsPerSample / 8) {
throw new IOException("fmt.ByteRate field inconsistent");
}
if (blockAlign != mNumChannels * mBitsPerSample / 8) {
throw new IOException("fmt.BlockAlign field inconsistent");
}
/* data chunk */
readId(in, "data");
mNumBytes = readInt(in);
return HEADER_LENGTH;
}
private static void readId(InputStream in, String id) throws IOException {
for (int i = 0; i < id.length(); i++) {
if (id.charAt(i) != in.read()) throw new IOException( id + " tag not present");
}
}
private static int readInt(InputStream in) throws IOException {
return in.read() | (in.read() << 8) | (in.read() << 16) | (in.read() << 24);
}
private static short readShort(InputStream in) throws IOException {
return (short)(in.read() | (in.read() << 8));
}
/**
* Write a WAVE file header.
* @param out {@link java.io.OutputStream} to receive the header.
* @return number of bytes written.
* @throws IOException
*/
public int write(OutputStream out) throws IOException {
/* RIFF header */
writeId(out, "RIFF");
writeInt(out, 36 + mNumBytes);
writeId(out, "WAVE");
/* fmt chunk */
writeId(out, "fmt ");
writeInt(out, 16);
writeShort(out, mFormat);
writeShort(out, mNumChannels);
writeInt(out, mSampleRate);
writeInt(out, mNumChannels * mSampleRate * mBitsPerSample / 8);
writeShort(out, (short)(mNumChannels * mBitsPerSample / 8));
writeShort(out, mBitsPerSample);
/* data chunk */
writeId(out, "data");
writeInt(out, mNumBytes);
return HEADER_LENGTH;
}
private static void writeId(OutputStream out, String id) throws IOException {
for (int i = 0; i < id.length(); i++) out.write(id.charAt(i));
}
private static void writeInt(OutputStream out, int val) throws IOException {
out.write(val >> 0);
out.write(val >> 8);
out.write(val >> 16);
out.write(val >> 24);
}
private static void writeShort(OutputStream out, short val) throws IOException {
out.write(val >> 0);
out.write(val >> 8);
}
@Override
public String toString() {
return String.format(
"WaveHeader format=%d numChannels=%d sampleRate=%d bitsPerSample=%d numBytes=%d",
mFormat, mNumChannels, mSampleRate, mBitsPerSample, mNumBytes);
}
}

View File

@@ -0,0 +1,6 @@
<HTML>
<BODY>
Simple, synchronous SREC speech recognition API.
@hide
</BODY>
</HTML>