Merge "Delete android.speech.srec.*" into mnc-dev
This commit is contained in:
committed by
Android (Google) Code Review
commit
2c89e7d8db
@@ -1,110 +0,0 @@
|
||||
/*---------------------------------------------------------------------------*
|
||||
* MicrophoneInputStream.java *
|
||||
* *
|
||||
* Copyright 2007 Nuance Communciations, Inc. *
|
||||
* *
|
||||
* Licensed under the Apache License, Version 2.0 (the 'License'); *
|
||||
* you may not use this file except in compliance with the License. *
|
||||
* *
|
||||
* You may obtain a copy of the License at *
|
||||
* http://www.apache.org/licenses/LICENSE-2.0 *
|
||||
* *
|
||||
* Unless required by applicable law or agreed to in writing, software *
|
||||
* distributed under the License is distributed on an 'AS IS' BASIS, *
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
|
||||
* See the License for the specific language governing permissions and *
|
||||
* limitations under the License. *
|
||||
* *
|
||||
*---------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
package android.speech.srec;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.lang.IllegalStateException;
|
||||
|
||||
|
||||
/**
|
||||
* PCM input stream from the microphone, 16 bits per sample.
|
||||
*/
|
||||
public final class MicrophoneInputStream extends InputStream {
|
||||
static {
|
||||
System.loadLibrary("srec_jni");
|
||||
}
|
||||
|
||||
private final static String TAG = "MicrophoneInputStream";
|
||||
private long mAudioRecord = 0;
|
||||
private byte[] mOneByte = new byte[1];
|
||||
|
||||
/**
|
||||
* MicrophoneInputStream constructor.
|
||||
* @param sampleRate sample rate of the microphone, typically 11025 or 8000.
|
||||
* @param fifoDepth depth of the real time fifo, measured in sampleRate clock ticks.
|
||||
* This determines how long an application may delay before losing data.
|
||||
*/
|
||||
public MicrophoneInputStream(int sampleRate, int fifoDepth) throws IOException {
|
||||
mAudioRecord = AudioRecordNew(sampleRate, fifoDepth);
|
||||
if (mAudioRecord == 0) throw new IOException("AudioRecord constructor failed - busy?");
|
||||
int status = AudioRecordStart(mAudioRecord);
|
||||
if (status != 0) {
|
||||
close();
|
||||
throw new IOException("AudioRecord start failed: " + status);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
if (mAudioRecord == 0) throw new IllegalStateException("not open");
|
||||
int rtn = AudioRecordRead(mAudioRecord, mOneByte, 0, 1);
|
||||
return rtn == 1 ? ((int)mOneByte[0] & 0xff) : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b) throws IOException {
|
||||
if (mAudioRecord == 0) throw new IllegalStateException("not open");
|
||||
return AudioRecordRead(mAudioRecord, b, 0, b.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b, int offset, int length) throws IOException {
|
||||
if (mAudioRecord == 0) throw new IllegalStateException("not open");
|
||||
// TODO: should we force all reads to be a multiple of the sample size?
|
||||
return AudioRecordRead(mAudioRecord, b, offset, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes this stream.
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (mAudioRecord != 0) {
|
||||
try {
|
||||
AudioRecordStop(mAudioRecord);
|
||||
} finally {
|
||||
try {
|
||||
AudioRecordDelete(mAudioRecord);
|
||||
} finally {
|
||||
mAudioRecord = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void finalize() throws Throwable {
|
||||
if (mAudioRecord != 0) {
|
||||
close();
|
||||
throw new IOException("someone forgot to close MicrophoneInputStream");
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// AudioRecord JNI interface
|
||||
//
|
||||
private static native long AudioRecordNew(int sampleRate, int fifoDepth);
|
||||
private static native int AudioRecordStart(long audioRecord);
|
||||
private static native int AudioRecordRead(long audioRecord, byte[] b, int offset, int length) throws IOException;
|
||||
private static native void AudioRecordStop(long audioRecord) throws IOException;
|
||||
private static native void AudioRecordDelete(long audioRecord) throws IOException;
|
||||
}
|
||||
@@ -1,716 +0,0 @@
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
* Recognizer.java
|
||||
*
|
||||
* Copyright 2007 Nuance Communciations, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the 'License'); you may not
|
||||
* use this file except in compliance with the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
package android.speech.srec;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Simple, synchronous speech recognizer, using the Nuance SREC package.
|
||||
* Usages proceeds as follows:
|
||||
*
|
||||
* <ul>
|
||||
* <li>Create a <code>Recognizer</code>.
|
||||
* <li>Create a <code>Recognizer.Grammar</code>.
|
||||
* <li>Setup the <code>Recognizer.Grammar</code>.
|
||||
* <li>Reset the <code>Recognizer.Grammar</code> slots, if needed.
|
||||
* <li>Fill the <code>Recognizer.Grammar</code> slots, if needed.
|
||||
* <li>Compile the <code>Recognizer.Grammar</code>, if needed.
|
||||
* <li>Save the filled <code>Recognizer.Grammar</code>, if needed.
|
||||
* <li>Start the <code>Recognizer</code>.
|
||||
* <li>Loop over <code>advance</code> and <code>putAudio</code> until recognition complete.
|
||||
* <li>Fetch and process results, or notify of failure.
|
||||
* <li>Stop the <code>Recognizer</code>.
|
||||
* <li>Destroy the <code>Recognizer</code>.
|
||||
* </ul>
|
||||
*
|
||||
* <p>Below is example code</p>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
*
|
||||
* // create and start audio input
|
||||
* InputStream audio = new MicrophoneInputStream(11025, 11025*5);
|
||||
* // create a Recognizer
|
||||
* String cdir = Recognizer.getConfigDir(null);
|
||||
* Recognizer recognizer = new Recognizer(cdir + "/baseline11k.par");
|
||||
* // create and load a Grammar
|
||||
* Recognizer.Grammar grammar = recognizer.new Grammar(cdir + "/grammars/VoiceDialer.g2g");
|
||||
* // setup the Grammar to work with the Recognizer
|
||||
* grammar.setupRecognizer();
|
||||
* // fill the Grammar slots with names and save, if required
|
||||
* grammar.resetAllSlots();
|
||||
* for (String name : names) grammar.addWordToSlot("@Names", name, null, 1, "V=1");
|
||||
* grammar.compile();
|
||||
* grammar.save(".../foo.g2g");
|
||||
* // start the Recognizer
|
||||
* recognizer.start();
|
||||
* // loop over Recognizer events
|
||||
* while (true) {
|
||||
* switch (recognizer.advance()) {
|
||||
* case Recognizer.EVENT_INCOMPLETE:
|
||||
* case Recognizer.EVENT_STARTED:
|
||||
* case Recognizer.EVENT_START_OF_VOICING:
|
||||
* case Recognizer.EVENT_END_OF_VOICING:
|
||||
* // let the Recognizer continue to run
|
||||
* continue;
|
||||
* case Recognizer.EVENT_RECOGNITION_RESULT:
|
||||
* // success, so fetch results here!
|
||||
* for (int i = 0; i < recognizer.getResultCount(); i++) {
|
||||
* String result = recognizer.getResult(i, Recognizer.KEY_LITERAL);
|
||||
* }
|
||||
* break;
|
||||
* case Recognizer.EVENT_NEED_MORE_AUDIO:
|
||||
* // put more audio in the Recognizer
|
||||
* recognizer.putAudio(audio);
|
||||
* continue;
|
||||
* default:
|
||||
* notifyFailure();
|
||||
* break;
|
||||
* }
|
||||
* break;
|
||||
* }
|
||||
* // stop the Recognizer
|
||||
* recognizer.stop();
|
||||
* // destroy the Recognizer
|
||||
* recognizer.destroy();
|
||||
* // stop the audio device
|
||||
* audio.close();
|
||||
*
|
||||
* </pre>
|
||||
*/
|
||||
public final class Recognizer {
|
||||
static {
|
||||
System.loadLibrary("srec_jni");
|
||||
}
|
||||
|
||||
private static String TAG = "Recognizer";
|
||||
|
||||
/**
|
||||
* Result key corresponding to confidence score.
|
||||
*/
|
||||
public static final String KEY_CONFIDENCE = "conf";
|
||||
|
||||
/**
|
||||
* Result key corresponding to literal text.
|
||||
*/
|
||||
public static final String KEY_LITERAL = "literal";
|
||||
|
||||
/**
|
||||
* Result key corresponding to semantic meaning text.
|
||||
*/
|
||||
public static final String KEY_MEANING = "meaning";
|
||||
|
||||
// handle to SR_Vocabulary object
|
||||
private long mVocabulary = 0;
|
||||
|
||||
// handle to SR_Recognizer object
|
||||
private long mRecognizer = 0;
|
||||
|
||||
// Grammar currently associated with Recognizer via SR_GrammarSetupRecognizer
|
||||
private Grammar mActiveGrammar = null;
|
||||
|
||||
/**
|
||||
* Get the pathname of the SREC configuration directory corresponding to the
|
||||
* language indicated by the Locale.
|
||||
* This directory contains dictionaries, speech models,
|
||||
* configuration files, and other data needed by the Recognizer.
|
||||
* @param locale <code>Locale</code> corresponding to the desired language,
|
||||
* or null for default, currently <code>Locale.US</code>.
|
||||
* @return Pathname of the configuration directory.
|
||||
*/
|
||||
public static String getConfigDir(Locale locale) {
|
||||
if (locale == null) locale = Locale.US;
|
||||
String dir = "/system/usr/srec/config/" +
|
||||
locale.toString().replace('_', '.').toLowerCase(Locale.ROOT);
|
||||
if ((new File(dir)).isDirectory()) return dir;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of a SREC speech recognizer.
|
||||
*
|
||||
* @param configFile pathname of the baseline*.par configuration file,
|
||||
* which in turn contains references to dictionaries, speech models,
|
||||
* and other data needed to configure and operate the recognizer.
|
||||
* A separate config file is needed for each audio sample rate.
|
||||
* Two files, baseline11k.par and baseline8k.par, which correspond to
|
||||
* 11025 and 8000 hz, are present in the directory indicated by
|
||||
* {@link #getConfigDir}.
|
||||
* @throws IOException
|
||||
*/
|
||||
public Recognizer(String configFile) throws IOException {
|
||||
PMemInit();
|
||||
SR_SessionCreate(configFile);
|
||||
mRecognizer = SR_RecognizerCreate();
|
||||
SR_RecognizerSetup(mRecognizer);
|
||||
mVocabulary = SR_VocabularyLoad();
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a grammar loaded into the Recognizer.
|
||||
*/
|
||||
public class Grammar {
|
||||
private long mGrammar = 0;
|
||||
|
||||
/**
|
||||
* Create a <code>Grammar</code> instance.
|
||||
* @param g2gFileName pathname of g2g file.
|
||||
*/
|
||||
public Grammar(String g2gFileName) throws IOException {
|
||||
mGrammar = SR_GrammarLoad(g2gFileName);
|
||||
SR_GrammarSetupVocabulary(mGrammar, mVocabulary);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset all slots.
|
||||
*/
|
||||
public void resetAllSlots() {
|
||||
SR_GrammarResetAllSlots(mGrammar);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a word to a slot.
|
||||
*
|
||||
* @param slot slot name.
|
||||
* @param word word to insert.
|
||||
* @param pron pronunciation, or null to derive from word.
|
||||
* @param weight weight to give the word. One is normal, 50 is low.
|
||||
* @param tag semantic meaning tag string.
|
||||
*/
|
||||
public void addWordToSlot(String slot, String word, String pron, int weight, String tag) {
|
||||
SR_GrammarAddWordToSlot(mGrammar, slot, word, pron, weight, tag);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compile all slots.
|
||||
*/
|
||||
public void compile() {
|
||||
SR_GrammarCompile(mGrammar);
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup <code>Grammar</code> with <code>Recognizer</code>.
|
||||
*/
|
||||
public void setupRecognizer() {
|
||||
SR_GrammarSetupRecognizer(mGrammar, mRecognizer);
|
||||
mActiveGrammar = this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save <code>Grammar</code> to g2g file.
|
||||
*
|
||||
* @param g2gFileName
|
||||
* @throws IOException
|
||||
*/
|
||||
public void save(String g2gFileName) throws IOException {
|
||||
SR_GrammarSave(mGrammar, g2gFileName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release resources associated with this <code>Grammar</code>.
|
||||
*/
|
||||
public void destroy() {
|
||||
// TODO: need to do cleanup and disassociation with Recognizer
|
||||
if (mGrammar != 0) {
|
||||
SR_GrammarDestroy(mGrammar);
|
||||
mGrammar = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up resources.
|
||||
*/
|
||||
protected void finalize() {
|
||||
if (mGrammar != 0) {
|
||||
destroy();
|
||||
throw new IllegalStateException("someone forgot to destroy Grammar");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start recognition
|
||||
*/
|
||||
public void start() {
|
||||
// TODO: shouldn't be here?
|
||||
SR_RecognizerActivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash", 1);
|
||||
SR_RecognizerStart(mRecognizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process some audio and return the current status.
|
||||
* @return recognition event, one of:
|
||||
* <ul>
|
||||
* <li><code>EVENT_INVALID</code>
|
||||
* <li><code>EVENT_NO_MATCH</code>
|
||||
* <li><code>EVENT_INCOMPLETE</code>
|
||||
* <li><code>EVENT_STARTED</code>
|
||||
* <li><code>EVENT_STOPPED</code>
|
||||
* <li><code>EVENT_START_OF_VOICING</code>
|
||||
* <li><code>EVENT_END_OF_VOICING</code>
|
||||
* <li><code>EVENT_SPOKE_TOO_SOON</code>
|
||||
* <li><code>EVENT_RECOGNITION_RESULT</code>
|
||||
* <li><code>EVENT_START_OF_UTTERANCE_TIMEOUT</code>
|
||||
* <li><code>EVENT_RECOGNITION_TIMEOUT</code>
|
||||
* <li><code>EVENT_NEED_MORE_AUDIO</code>
|
||||
* <li><code>EVENT_MAX_SPEECH</code>
|
||||
* </ul>
|
||||
*/
|
||||
public int advance() {
|
||||
return SR_RecognizerAdvance(mRecognizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Put audio samples into the <code>Recognizer</code>.
|
||||
* @param buf holds the audio samples.
|
||||
* @param offset offset of the first sample.
|
||||
* @param length number of bytes containing samples.
|
||||
* @param isLast indicates no more audio data, normally false.
|
||||
* @return number of bytes accepted.
|
||||
*/
|
||||
public int putAudio(byte[] buf, int offset, int length, boolean isLast) {
|
||||
return SR_RecognizerPutAudio(mRecognizer, buf, offset, length, isLast);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read audio samples from an <code>InputStream</code> and put them in the
|
||||
* <code>Recognizer</code>.
|
||||
* @param audio <code>InputStream</code> containing PCM audio samples.
|
||||
*/
|
||||
public void putAudio(InputStream audio) throws IOException {
|
||||
// make sure the audio buffer is allocated
|
||||
if (mPutAudioBuffer == null) mPutAudioBuffer = new byte[512];
|
||||
// read some data
|
||||
int nbytes = audio.read(mPutAudioBuffer);
|
||||
// eof, so signal Recognizer
|
||||
if (nbytes == -1) {
|
||||
SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, 0, true);
|
||||
}
|
||||
// put it into the Recognizer
|
||||
else if (nbytes != SR_RecognizerPutAudio(mRecognizer, mPutAudioBuffer, 0, nbytes, false)) {
|
||||
throw new IOException("SR_RecognizerPutAudio failed nbytes=" + nbytes);
|
||||
}
|
||||
}
|
||||
|
||||
// audio buffer for putAudio(InputStream)
|
||||
private byte[] mPutAudioBuffer = null;
|
||||
|
||||
/**
|
||||
* Get the number of recognition results. Must be called after
|
||||
* <code>EVENT_RECOGNITION_RESULT</code> is returned by
|
||||
* <code>advance</code>, but before <code>stop</code>.
|
||||
*
|
||||
* @return number of results in nbest list.
|
||||
*/
|
||||
public int getResultCount() {
|
||||
return SR_RecognizerResultGetSize(mRecognizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a set of keys for the result. Must be called after
|
||||
* <code>EVENT_RECOGNITION_RESULT</code> is returned by
|
||||
* <code>advance</code>, but before <code>stop</code>.
|
||||
*
|
||||
* @param index index of result.
|
||||
* @return array of keys.
|
||||
*/
|
||||
public String[] getResultKeys(int index) {
|
||||
return SR_RecognizerResultGetKeyList(mRecognizer, index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a result value. Must be called after
|
||||
* <code>EVENT_RECOGNITION_RESULT</code> is returned by
|
||||
* <code>advance</code>, but before <code>stop</code>.
|
||||
*
|
||||
* @param index index of the result.
|
||||
* @param key key of the result. This is typically one of
|
||||
* <code>KEY_CONFIDENCE</code>, <code>KEY_LITERAL</code>, or
|
||||
* <code>KEY_MEANING</code>, but the user can also define their own keys
|
||||
* in a grxml file, or in the <code>tag</code> slot of
|
||||
* <code>Grammar.addWordToSlot</code>.
|
||||
* @return the result.
|
||||
*/
|
||||
public String getResult(int index, String key) {
|
||||
return SR_RecognizerResultGetValue(mRecognizer, index, key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the <code>Recognizer</code>.
|
||||
*/
|
||||
public void stop() {
|
||||
SR_RecognizerStop(mRecognizer);
|
||||
SR_RecognizerDeactivateRule(mRecognizer, mActiveGrammar.mGrammar, "trash");
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the acoustic state vectorto it's default value.
|
||||
*
|
||||
* @hide
|
||||
*/
|
||||
public void resetAcousticState() {
|
||||
SR_AcousticStateReset(mRecognizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the acoustic state vector.
|
||||
* @param state String containing the acoustic state vector.
|
||||
*
|
||||
* @hide
|
||||
*/
|
||||
public void setAcousticState(String state) {
|
||||
SR_AcousticStateSet(mRecognizer, state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the acoustic state vector.
|
||||
* @return String containing the acoustic state vector.
|
||||
*
|
||||
* @hide
|
||||
*/
|
||||
public String getAcousticState() {
|
||||
return SR_AcousticStateGet(mRecognizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up resources.
|
||||
*/
|
||||
public void destroy() {
|
||||
try {
|
||||
if (mVocabulary != 0) SR_VocabularyDestroy(mVocabulary);
|
||||
} finally {
|
||||
mVocabulary = 0;
|
||||
try {
|
||||
if (mRecognizer != 0) SR_RecognizerUnsetup(mRecognizer);
|
||||
} finally {
|
||||
try {
|
||||
if (mRecognizer != 0) SR_RecognizerDestroy(mRecognizer);
|
||||
} finally {
|
||||
mRecognizer = 0;
|
||||
try {
|
||||
SR_SessionDestroy();
|
||||
} finally {
|
||||
PMemShutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up resources.
|
||||
*/
|
||||
protected void finalize() throws Throwable {
|
||||
if (mVocabulary != 0 || mRecognizer != 0) {
|
||||
destroy();
|
||||
throw new IllegalStateException("someone forgot to destroy Recognizer");
|
||||
}
|
||||
}
|
||||
|
||||
/* an example session captured, for reference
|
||||
void doall() {
|
||||
if (PMemInit ( )
|
||||
|| lhs_audioinOpen ( WAVE_MAPPER, SREC_TEST_DEFAULT_AUDIO_FREQUENCY, &audio_in_handle )
|
||||
|| srec_test_init_application_data ( &applicationData, argc, argv )
|
||||
|| SR_SessionCreate ( "/system/usr/srec/config/en.us/baseline11k.par" )
|
||||
|| SR_RecognizerCreate ( &applicationData.recognizer )
|
||||
|| SR_RecognizerSetup ( applicationData.recognizer)
|
||||
|| ESR_SessionGetLCHAR ( L("cmdline.vocabulary"), filename, &flen )
|
||||
|| SR_VocabularyLoad ( filename, &applicationData.vocabulary )
|
||||
|| SR_VocabularyGetLanguage ( applicationData.vocabulary, &applicationData.locale )
|
||||
|| (applicationData.nametag = NULL)
|
||||
|| SR_NametagsCreate ( &applicationData.nametags )
|
||||
|| (LSTRCPY ( applicationData.grammars [0].grammar_path, "/system/usr/srec/config/en.us/grammars/VoiceDialer.g2g" ), 0)
|
||||
|| (LSTRCPY ( applicationData.grammars [0].grammarID, "BothTags" ), 0)
|
||||
|| (LSTRCPY ( applicationData.grammars [0].ruleName, "trash" ), 0)
|
||||
|| (applicationData.grammars [0].is_ve_grammar = ESR_FALSE, 0)
|
||||
|| SR_GrammarLoad (applicationData.grammars [0].grammar_path, &applicationData.grammars [applicationData.grammarCount].grammar )
|
||||
|| SR_GrammarSetupVocabulary ( applicationData.grammars [0].grammar, applicationData.vocabulary )
|
||||
|| SR_GrammarSetupRecognizer( applicationData.grammars [0].grammar, applicationData.recognizer )
|
||||
|| SR_GrammarSetDispatchFunction ( applicationData.grammars [0].grammar, L("myDSMCallback"), NULL, myDSMCallback )
|
||||
|| (applicationData.grammarCount++, 0)
|
||||
|| SR_RecognizerActivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar,
|
||||
applicationData.grammars [0].ruleName, 1 )
|
||||
|| (applicationData.active_grammar_num = 0, 0)
|
||||
|| lhs_audioinStart ( audio_in_handle )
|
||||
|| SR_RecognizerStart ( applicationData.recognizer )
|
||||
|| strl ( applicationData.grammars [0].grammar, &applicationData, audio_in_handle, &recognition_count )
|
||||
|| SR_RecognizerStop ( applicationData.recognizer )
|
||||
|| lhs_audioinStop ( audio_in_handle )
|
||||
|| SR_RecognizerDeactivateRule ( applicationData.recognizer, applicationData.grammars [0].grammar, applicationData.grammars [0].ruleName )
|
||||
|| (applicationData.active_grammar_num = -1, 0)
|
||||
|| SR_GrammarDestroy ( applicationData.grammars [0].grammar )
|
||||
|| (applicationData.grammarCount--, 0)
|
||||
|| SR_NametagsDestroy ( applicationData.nametags )
|
||||
|| (applicationData.nametags = NULL, 0)
|
||||
|| SR_VocabularyDestroy ( applicationData.vocabulary )
|
||||
|| (applicationData.vocabulary = NULL)
|
||||
|| SR_RecognizerUnsetup ( applicationData.recognizer) // releases acoustic models
|
||||
|| SR_RecognizerDestroy ( applicationData.recognizer )
|
||||
|| (applicationData.recognizer = NULL)
|
||||
|| SR_SessionDestroy ( )
|
||||
|| srec_test_shutdown_application_data ( &applicationData )
|
||||
|| lhs_audioinClose ( &audio_in_handle )
|
||||
|| PMemShutdown ( )
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
// PMem native methods
|
||||
//
|
||||
private static native void PMemInit();
|
||||
private static native void PMemShutdown();
|
||||
|
||||
|
||||
//
|
||||
// SR_Session native methods
|
||||
//
|
||||
private static native void SR_SessionCreate(String filename);
|
||||
private static native void SR_SessionDestroy();
|
||||
|
||||
|
||||
//
|
||||
// SR_Recognizer native methods
|
||||
//
|
||||
|
||||
/**
|
||||
* Reserved value.
|
||||
*/
|
||||
public final static int EVENT_INVALID = 0;
|
||||
|
||||
/**
|
||||
* <code>Recognizer</code> could not find a match for the utterance.
|
||||
*/
|
||||
public final static int EVENT_NO_MATCH = 1;
|
||||
|
||||
/**
|
||||
* <code>Recognizer</code> processed one frame of audio.
|
||||
*/
|
||||
public final static int EVENT_INCOMPLETE = 2;
|
||||
|
||||
/**
|
||||
* <code>Recognizer</code> has just been started.
|
||||
*/
|
||||
public final static int EVENT_STARTED = 3;
|
||||
|
||||
/**
|
||||
* <code>Recognizer</code> is stopped.
|
||||
*/
|
||||
public final static int EVENT_STOPPED = 4;
|
||||
|
||||
/**
|
||||
* Beginning of speech detected.
|
||||
*/
|
||||
public final static int EVENT_START_OF_VOICING = 5;
|
||||
|
||||
/**
|
||||
* End of speech detected.
|
||||
*/
|
||||
public final static int EVENT_END_OF_VOICING = 6;
|
||||
|
||||
/**
|
||||
* Beginning of utterance occured too soon.
|
||||
*/
|
||||
public final static int EVENT_SPOKE_TOO_SOON = 7;
|
||||
|
||||
/**
|
||||
* Recognition match detected.
|
||||
*/
|
||||
public final static int EVENT_RECOGNITION_RESULT = 8;
|
||||
|
||||
/**
|
||||
* Timeout occured before beginning of utterance.
|
||||
*/
|
||||
public final static int EVENT_START_OF_UTTERANCE_TIMEOUT = 9;
|
||||
|
||||
/**
|
||||
* Timeout occured before speech recognition could complete.
|
||||
*/
|
||||
public final static int EVENT_RECOGNITION_TIMEOUT = 10;
|
||||
|
||||
/**
|
||||
* Not enough samples to process one frame.
|
||||
*/
|
||||
public final static int EVENT_NEED_MORE_AUDIO = 11;
|
||||
|
||||
/**
|
||||
* More audio encountered than is allowed by 'swirec_max_speech_duration'.
|
||||
*/
|
||||
public final static int EVENT_MAX_SPEECH = 12;
|
||||
|
||||
/**
|
||||
* Produce a displayable string from an <code>advance</code> event.
|
||||
* @param event
|
||||
* @return String representing the event.
|
||||
*/
|
||||
public static String eventToString(int event) {
|
||||
switch (event) {
|
||||
case EVENT_INVALID:
|
||||
return "EVENT_INVALID";
|
||||
case EVENT_NO_MATCH:
|
||||
return "EVENT_NO_MATCH";
|
||||
case EVENT_INCOMPLETE:
|
||||
return "EVENT_INCOMPLETE";
|
||||
case EVENT_STARTED:
|
||||
return "EVENT_STARTED";
|
||||
case EVENT_STOPPED:
|
||||
return "EVENT_STOPPED";
|
||||
case EVENT_START_OF_VOICING:
|
||||
return "EVENT_START_OF_VOICING";
|
||||
case EVENT_END_OF_VOICING:
|
||||
return "EVENT_END_OF_VOICING";
|
||||
case EVENT_SPOKE_TOO_SOON:
|
||||
return "EVENT_SPOKE_TOO_SOON";
|
||||
case EVENT_RECOGNITION_RESULT:
|
||||
return "EVENT_RECOGNITION_RESULT";
|
||||
case EVENT_START_OF_UTTERANCE_TIMEOUT:
|
||||
return "EVENT_START_OF_UTTERANCE_TIMEOUT";
|
||||
case EVENT_RECOGNITION_TIMEOUT:
|
||||
return "EVENT_RECOGNITION_TIMEOUT";
|
||||
case EVENT_NEED_MORE_AUDIO:
|
||||
return "EVENT_NEED_MORE_AUDIO";
|
||||
case EVENT_MAX_SPEECH:
|
||||
return "EVENT_MAX_SPEECH";
|
||||
}
|
||||
return "EVENT_" + event;
|
||||
}
|
||||
|
||||
//
|
||||
// SR_Recognizer methods
|
||||
//
|
||||
private static native void SR_RecognizerStart(long recognizer);
|
||||
private static native void SR_RecognizerStop(long recognizer);
|
||||
private static native long SR_RecognizerCreate();
|
||||
private static native void SR_RecognizerDestroy(long recognizer);
|
||||
private static native void SR_RecognizerSetup(long recognizer);
|
||||
private static native void SR_RecognizerUnsetup(long recognizer);
|
||||
private static native boolean SR_RecognizerIsSetup(long recognizer);
|
||||
private static native String SR_RecognizerGetParameter(long recognizer, String key);
|
||||
private static native int SR_RecognizerGetSize_tParameter(long recognizer, String key);
|
||||
private static native boolean SR_RecognizerGetBoolParameter(long recognizer, String key);
|
||||
private static native void SR_RecognizerSetParameter(long recognizer, String key, String value);
|
||||
private static native void SR_RecognizerSetSize_tParameter(long recognizer,
|
||||
String key, int value);
|
||||
private static native void SR_RecognizerSetBoolParameter(long recognizer, String key,
|
||||
boolean value);
|
||||
private static native void SR_RecognizerSetupRule(long recognizer, long grammar,
|
||||
String ruleName);
|
||||
private static native boolean SR_RecognizerHasSetupRules(long recognizer);
|
||||
private static native void SR_RecognizerActivateRule(long recognizer, long grammar,
|
||||
String ruleName, int weight);
|
||||
private static native void SR_RecognizerDeactivateRule(long recognizer, long grammar,
|
||||
String ruleName);
|
||||
private static native void SR_RecognizerDeactivateAllRules(long recognizer);
|
||||
private static native boolean SR_RecognizerIsActiveRule(long recognizer, long grammar,
|
||||
String ruleName);
|
||||
private static native boolean SR_RecognizerCheckGrammarConsistency(long recognizer,
|
||||
long grammar);
|
||||
private static native int SR_RecognizerPutAudio(long recognizer, byte[] buffer, int offset,
|
||||
int length, boolean isLast);
|
||||
private static native int SR_RecognizerAdvance(long recognizer);
|
||||
// private static native void SR_RecognizerLoadUtterance(long recognizer,
|
||||
// const LCHAR* filename);
|
||||
// private static native void SR_RecognizerLoadWaveFile(long recognizer,
|
||||
// const LCHAR* filename);
|
||||
// private static native void SR_RecognizerSetLockFunction(long recognizer,
|
||||
// SR_RecognizerLockFunction function, void* data);
|
||||
private static native boolean SR_RecognizerIsSignalClipping(long recognizer);
|
||||
private static native boolean SR_RecognizerIsSignalDCOffset(long recognizer);
|
||||
private static native boolean SR_RecognizerIsSignalNoisy(long recognizer);
|
||||
private static native boolean SR_RecognizerIsSignalTooQuiet(long recognizer);
|
||||
private static native boolean SR_RecognizerIsSignalTooFewSamples(long recognizer);
|
||||
private static native boolean SR_RecognizerIsSignalTooManySamples(long recognizer);
|
||||
// private static native void SR_Recognizer_Change_Sample_Rate (size_t new_sample_rate);
|
||||
|
||||
|
||||
//
|
||||
// SR_AcousticState native methods
|
||||
//
|
||||
private static native void SR_AcousticStateReset(long recognizer);
|
||||
private static native void SR_AcousticStateSet(long recognizer, String state);
|
||||
private static native String SR_AcousticStateGet(long recognizer);
|
||||
|
||||
|
||||
//
|
||||
// SR_Grammar native methods
|
||||
//
|
||||
private static native void SR_GrammarCompile(long grammar);
|
||||
private static native void SR_GrammarAddWordToSlot(long grammar, String slot,
|
||||
String word, String pronunciation, int weight, String tag);
|
||||
private static native void SR_GrammarResetAllSlots(long grammar);
|
||||
// private static native void SR_GrammarAddNametagToSlot(long grammar, String slot,
|
||||
// const struct SR_Nametag_t* nametag, int weight, String tag);
|
||||
private static native void SR_GrammarSetupVocabulary(long grammar, long vocabulary);
|
||||
// private static native void SR_GrammarSetupModels(long grammar, SR_AcousticModels* models);
|
||||
private static native void SR_GrammarSetupRecognizer(long grammar, long recognizer);
|
||||
private static native void SR_GrammarUnsetupRecognizer(long grammar);
|
||||
// private static native void SR_GrammarGetModels(long grammar,SR_AcousticModels** models);
|
||||
private static native long SR_GrammarCreate();
|
||||
private static native void SR_GrammarDestroy(long grammar);
|
||||
private static native long SR_GrammarLoad(String filename);
|
||||
private static native void SR_GrammarSave(long grammar, String filename);
|
||||
// private static native void SR_GrammarSetDispatchFunction(long grammar,
|
||||
// const LCHAR* name, void* userData, SR_GrammarDispatchFunction function);
|
||||
// private static native void SR_GrammarSetParameter(long grammar, const
|
||||
// LCHAR* key, void* value);
|
||||
// private static native void SR_GrammarSetSize_tParameter(long grammar,
|
||||
// const LCHAR* key, size_t value);
|
||||
// private static native void SR_GrammarGetParameter(long grammar, const
|
||||
// LCHAR* key, void** value);
|
||||
// private static native void SR_GrammarGetSize_tParameter(long grammar,
|
||||
// const LCHAR* key, size_t* value);
|
||||
// private static native void SR_GrammarCheckParse(long grammar, const LCHAR*
|
||||
// transcription, SR_SemanticResult** result, size_t* resultCount);
|
||||
private static native void SR_GrammarAllowOnly(long grammar, String transcription);
|
||||
private static native void SR_GrammarAllowAll(long grammar);
|
||||
|
||||
|
||||
//
|
||||
// SR_Vocabulary native methods
|
||||
//
|
||||
// private static native int SR_VocabularyCreate();
|
||||
private static native long SR_VocabularyLoad();
|
||||
// private static native void SR_VocabularySave(SR_Vocabulary* self,
|
||||
// const LCHAR* filename);
|
||||
// private static native void SR_VocabularyAddWord(SR_Vocabulary* self,
|
||||
// const LCHAR* word);
|
||||
// private static native void SR_VocabularyGetLanguage(SR_Vocabulary* self,
|
||||
// ESR_Locale* locale);
|
||||
private static native void SR_VocabularyDestroy(long vocabulary);
|
||||
private static native String SR_VocabularyGetPronunciation(long vocabulary, String word);
|
||||
|
||||
|
||||
//
|
||||
// SR_RecognizerResult native methods
|
||||
//
|
||||
private static native byte[] SR_RecognizerResultGetWaveform(long recognizer);
|
||||
private static native int SR_RecognizerResultGetSize(long recognizer);
|
||||
private static native int SR_RecognizerResultGetKeyCount(long recognizer, int nbest);
|
||||
private static native String[] SR_RecognizerResultGetKeyList(long recognizer, int nbest);
|
||||
private static native String SR_RecognizerResultGetValue(long recognizer,
|
||||
int nbest, String key);
|
||||
// private static native void SR_RecognizerResultGetLocale(long recognizer, ESR_Locale* locale);
|
||||
}
|
||||
@@ -1,187 +0,0 @@
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
* UlawEncoderInputStream.java
|
||||
*
|
||||
* Copyright 2008 Nuance Communciations, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the 'License'); you may not
|
||||
* use this file except in compliance with the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
package android.speech.srec;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* InputStream which transforms 16 bit pcm data to ulaw data.
|
||||
*
|
||||
* Not yet ready to be supported, so
|
||||
* @hide
|
||||
*/
|
||||
public final class UlawEncoderInputStream extends InputStream {
|
||||
private final static String TAG = "UlawEncoderInputStream";
|
||||
|
||||
private final static int MAX_ULAW = 8192;
|
||||
private final static int SCALE_BITS = 16;
|
||||
|
||||
private InputStream mIn;
|
||||
|
||||
private int mMax = 0;
|
||||
|
||||
private final byte[] mBuf = new byte[1024];
|
||||
private int mBufCount = 0; // should be 0 or 1
|
||||
|
||||
private final byte[] mOneByte = new byte[1];
|
||||
|
||||
|
||||
public static void encode(byte[] pcmBuf, int pcmOffset,
|
||||
byte[] ulawBuf, int ulawOffset, int length, int max) {
|
||||
|
||||
// from 'ulaw' in wikipedia
|
||||
// +8191 to +8159 0x80
|
||||
// +8158 to +4063 in 16 intervals of 256 0x80 + interval number
|
||||
// +4062 to +2015 in 16 intervals of 128 0x90 + interval number
|
||||
// +2014 to +991 in 16 intervals of 64 0xA0 + interval number
|
||||
// +990 to +479 in 16 intervals of 32 0xB0 + interval number
|
||||
// +478 to +223 in 16 intervals of 16 0xC0 + interval number
|
||||
// +222 to +95 in 16 intervals of 8 0xD0 + interval number
|
||||
// +94 to +31 in 16 intervals of 4 0xE0 + interval number
|
||||
// +30 to +1 in 15 intervals of 2 0xF0 + interval number
|
||||
// 0 0xFF
|
||||
|
||||
// -1 0x7F
|
||||
// -31 to -2 in 15 intervals of 2 0x70 + interval number
|
||||
// -95 to -32 in 16 intervals of 4 0x60 + interval number
|
||||
// -223 to -96 in 16 intervals of 8 0x50 + interval number
|
||||
// -479 to -224 in 16 intervals of 16 0x40 + interval number
|
||||
// -991 to -480 in 16 intervals of 32 0x30 + interval number
|
||||
// -2015 to -992 in 16 intervals of 64 0x20 + interval number
|
||||
// -4063 to -2016 in 16 intervals of 128 0x10 + interval number
|
||||
// -8159 to -4064 in 16 intervals of 256 0x00 + interval number
|
||||
// -8192 to -8160 0x00
|
||||
|
||||
// set scale factors
|
||||
if (max <= 0) max = MAX_ULAW;
|
||||
|
||||
int coef = MAX_ULAW * (1 << SCALE_BITS) / max;
|
||||
|
||||
for (int i = 0; i < length; i++) {
|
||||
int pcm = (0xff & pcmBuf[pcmOffset++]) + (pcmBuf[pcmOffset++] << 8);
|
||||
pcm = (pcm * coef) >> SCALE_BITS;
|
||||
|
||||
int ulaw;
|
||||
if (pcm >= 0) {
|
||||
ulaw = pcm <= 0 ? 0xff :
|
||||
pcm <= 30 ? 0xf0 + (( 30 - pcm) >> 1) :
|
||||
pcm <= 94 ? 0xe0 + (( 94 - pcm) >> 2) :
|
||||
pcm <= 222 ? 0xd0 + (( 222 - pcm) >> 3) :
|
||||
pcm <= 478 ? 0xc0 + (( 478 - pcm) >> 4) :
|
||||
pcm <= 990 ? 0xb0 + (( 990 - pcm) >> 5) :
|
||||
pcm <= 2014 ? 0xa0 + ((2014 - pcm) >> 6) :
|
||||
pcm <= 4062 ? 0x90 + ((4062 - pcm) >> 7) :
|
||||
pcm <= 8158 ? 0x80 + ((8158 - pcm) >> 8) :
|
||||
0x80;
|
||||
} else {
|
||||
ulaw = -1 <= pcm ? 0x7f :
|
||||
-31 <= pcm ? 0x70 + ((pcm - -31) >> 1) :
|
||||
-95 <= pcm ? 0x60 + ((pcm - -95) >> 2) :
|
||||
-223 <= pcm ? 0x50 + ((pcm - -223) >> 3) :
|
||||
-479 <= pcm ? 0x40 + ((pcm - -479) >> 4) :
|
||||
-991 <= pcm ? 0x30 + ((pcm - -991) >> 5) :
|
||||
-2015 <= pcm ? 0x20 + ((pcm - -2015) >> 6) :
|
||||
-4063 <= pcm ? 0x10 + ((pcm - -4063) >> 7) :
|
||||
-8159 <= pcm ? 0x00 + ((pcm - -8159) >> 8) :
|
||||
0x00;
|
||||
}
|
||||
ulawBuf[ulawOffset++] = (byte)ulaw;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the maximum of the absolute value of the pcm samples.
|
||||
* The return value can be used to set ulaw encoder scaling.
|
||||
* @param pcmBuf array containing 16 bit pcm data.
|
||||
* @param offset offset of start of 16 bit pcm data.
|
||||
* @param length number of pcm samples (not number of input bytes)
|
||||
* @return maximum abs of pcm data values
|
||||
*/
|
||||
public static int maxAbsPcm(byte[] pcmBuf, int offset, int length) {
|
||||
int max = 0;
|
||||
for (int i = 0; i < length; i++) {
|
||||
int pcm = (0xff & pcmBuf[offset++]) + (pcmBuf[offset++] << 8);
|
||||
if (pcm < 0) pcm = -pcm;
|
||||
if (pcm > max) max = pcm;
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an InputStream which takes 16 bit pcm data and produces ulaw data.
|
||||
* @param in InputStream containing 16 bit pcm data.
|
||||
* @param max pcm value corresponding to maximum ulaw value.
|
||||
*/
|
||||
public UlawEncoderInputStream(InputStream in, int max) {
|
||||
mIn = in;
|
||||
mMax = max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] buf, int offset, int length) throws IOException {
|
||||
if (mIn == null) throw new IllegalStateException("not open");
|
||||
|
||||
// return at least one byte, but try to fill 'length'
|
||||
while (mBufCount < 2) {
|
||||
int n = mIn.read(mBuf, mBufCount, Math.min(length * 2, mBuf.length - mBufCount));
|
||||
if (n == -1) return -1;
|
||||
mBufCount += n;
|
||||
}
|
||||
|
||||
// compand data
|
||||
int n = Math.min(mBufCount / 2, length);
|
||||
encode(mBuf, 0, buf, offset, n, mMax);
|
||||
|
||||
// move data to bottom of mBuf
|
||||
mBufCount -= n * 2;
|
||||
for (int i = 0; i < mBufCount; i++) mBuf[i] = mBuf[i + n * 2];
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] buf) throws IOException {
|
||||
return read(buf, 0, buf.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
int n = read(mOneByte, 0, 1);
|
||||
if (n == -1) return -1;
|
||||
return 0xff & (int)mOneByte[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (mIn != null) {
|
||||
InputStream in = mIn;
|
||||
mIn = null;
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int available() throws IOException {
|
||||
return (mIn.available() + mBufCount) / 2;
|
||||
}
|
||||
}
|
||||
@@ -1,276 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package android.speech.srec;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* This class represents the header of a WAVE format audio file, which usually
|
||||
* have a .wav suffix. The following integer valued fields are contained:
|
||||
* <ul>
|
||||
* <li> format - usually PCM, ALAW or ULAW.
|
||||
* <li> numChannels - 1 for mono, 2 for stereo.
|
||||
* <li> sampleRate - usually 8000, 11025, 16000, 22050, or 44100 hz.
|
||||
* <li> bitsPerSample - usually 16 for PCM, 8 for ALAW, or 8 for ULAW.
|
||||
* <li> numBytes - size of audio data after this header, in bytes.
|
||||
* </ul>
|
||||
*
|
||||
* Not yet ready to be supported, so
|
||||
* @hide
|
||||
*/
|
||||
public class WaveHeader {
|
||||
|
||||
// follows WAVE format in http://ccrma.stanford.edu/courses/422/projects/WaveFormat
|
||||
|
||||
private static final String TAG = "WaveHeader";
|
||||
|
||||
private static final int HEADER_LENGTH = 44;
|
||||
|
||||
/** Indicates PCM format. */
|
||||
public static final short FORMAT_PCM = 1;
|
||||
/** Indicates ALAW format. */
|
||||
public static final short FORMAT_ALAW = 6;
|
||||
/** Indicates ULAW format. */
|
||||
public static final short FORMAT_ULAW = 7;
|
||||
|
||||
private short mFormat;
|
||||
private short mNumChannels;
|
||||
private int mSampleRate;
|
||||
private short mBitsPerSample;
|
||||
private int mNumBytes;
|
||||
|
||||
/**
|
||||
* Construct a WaveHeader, with all fields defaulting to zero.
|
||||
*/
|
||||
public WaveHeader() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a WaveHeader, with fields initialized.
|
||||
* @param format format of audio data,
|
||||
* one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.
|
||||
* @param numChannels 1 for mono, 2 for stereo.
|
||||
* @param sampleRate typically 8000, 11025, 16000, 22050, or 44100 hz.
|
||||
* @param bitsPerSample usually 16 for PCM, 8 for ULAW or 8 for ALAW.
|
||||
* @param numBytes size of audio data after this header, in bytes.
|
||||
*/
|
||||
public WaveHeader(short format, short numChannels, int sampleRate, short bitsPerSample, int numBytes) {
|
||||
mFormat = format;
|
||||
mSampleRate = sampleRate;
|
||||
mNumChannels = numChannels;
|
||||
mBitsPerSample = bitsPerSample;
|
||||
mNumBytes = numBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the format field.
|
||||
* @return format field,
|
||||
* one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.
|
||||
*/
|
||||
public short getFormat() {
|
||||
return mFormat;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the format field.
|
||||
* @param format
|
||||
* one of {@link #FORMAT_PCM}, {@link #FORMAT_ULAW}, or {@link #FORMAT_ALAW}.
|
||||
* @return reference to this WaveHeader instance.
|
||||
*/
|
||||
public WaveHeader setFormat(short format) {
|
||||
mFormat = format;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of channels.
|
||||
* @return number of channels, 1 for mono, 2 for stereo.
|
||||
*/
|
||||
public short getNumChannels() {
|
||||
return mNumChannels;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the number of channels.
|
||||
* @param numChannels 1 for mono, 2 for stereo.
|
||||
* @return reference to this WaveHeader instance.
|
||||
*/
|
||||
public WaveHeader setNumChannels(short numChannels) {
|
||||
mNumChannels = numChannels;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample rate.
|
||||
* @return sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz.
|
||||
*/
|
||||
public int getSampleRate() {
|
||||
return mSampleRate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the sample rate.
|
||||
* @param sampleRate sample rate, typically 8000, 11025, 16000, 22050, or 44100 hz.
|
||||
* @return reference to this WaveHeader instance.
|
||||
*/
|
||||
public WaveHeader setSampleRate(int sampleRate) {
|
||||
mSampleRate = sampleRate;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of bits per sample.
|
||||
* @return number of bits per sample,
|
||||
* usually 16 for PCM, 8 for ULAW or 8 for ALAW.
|
||||
*/
|
||||
public short getBitsPerSample() {
|
||||
return mBitsPerSample;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the number of bits per sample.
|
||||
* @param bitsPerSample number of bits per sample,
|
||||
* usually 16 for PCM, 8 for ULAW or 8 for ALAW.
|
||||
* @return reference to this WaveHeader instance.
|
||||
*/
|
||||
public WaveHeader setBitsPerSample(short bitsPerSample) {
|
||||
mBitsPerSample = bitsPerSample;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the size of audio data after this header, in bytes.
|
||||
* @return size of audio data after this header, in bytes.
|
||||
*/
|
||||
public int getNumBytes() {
|
||||
return mNumBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the size of audio data after this header, in bytes.
|
||||
* @param numBytes size of audio data after this header, in bytes.
|
||||
* @return reference to this WaveHeader instance.
|
||||
*/
|
||||
public WaveHeader setNumBytes(int numBytes) {
|
||||
mNumBytes = numBytes;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read and initialize a WaveHeader.
|
||||
* @param in {@link java.io.InputStream} to read from.
|
||||
* @return number of bytes consumed.
|
||||
* @throws IOException
|
||||
*/
|
||||
public int read(InputStream in) throws IOException {
|
||||
/* RIFF header */
|
||||
readId(in, "RIFF");
|
||||
int numBytes = readInt(in) - 36;
|
||||
readId(in, "WAVE");
|
||||
|
||||
/* fmt chunk */
|
||||
readId(in, "fmt ");
|
||||
if (16 != readInt(in)) throw new IOException("fmt chunk length not 16");
|
||||
mFormat = readShort(in);
|
||||
mNumChannels = readShort(in);
|
||||
mSampleRate = readInt(in);
|
||||
int byteRate = readInt(in);
|
||||
short blockAlign = readShort(in);
|
||||
mBitsPerSample = readShort(in);
|
||||
if (byteRate != mNumChannels * mSampleRate * mBitsPerSample / 8) {
|
||||
throw new IOException("fmt.ByteRate field inconsistent");
|
||||
}
|
||||
if (blockAlign != mNumChannels * mBitsPerSample / 8) {
|
||||
throw new IOException("fmt.BlockAlign field inconsistent");
|
||||
}
|
||||
|
||||
/* data chunk */
|
||||
readId(in, "data");
|
||||
mNumBytes = readInt(in);
|
||||
|
||||
return HEADER_LENGTH;
|
||||
}
|
||||
|
||||
private static void readId(InputStream in, String id) throws IOException {
|
||||
for (int i = 0; i < id.length(); i++) {
|
||||
if (id.charAt(i) != in.read()) throw new IOException( id + " tag not present");
|
||||
}
|
||||
}
|
||||
|
||||
private static int readInt(InputStream in) throws IOException {
|
||||
return in.read() | (in.read() << 8) | (in.read() << 16) | (in.read() << 24);
|
||||
}
|
||||
|
||||
private static short readShort(InputStream in) throws IOException {
|
||||
return (short)(in.read() | (in.read() << 8));
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a WAVE file header.
|
||||
* @param out {@link java.io.OutputStream} to receive the header.
|
||||
* @return number of bytes written.
|
||||
* @throws IOException
|
||||
*/
|
||||
public int write(OutputStream out) throws IOException {
|
||||
/* RIFF header */
|
||||
writeId(out, "RIFF");
|
||||
writeInt(out, 36 + mNumBytes);
|
||||
writeId(out, "WAVE");
|
||||
|
||||
/* fmt chunk */
|
||||
writeId(out, "fmt ");
|
||||
writeInt(out, 16);
|
||||
writeShort(out, mFormat);
|
||||
writeShort(out, mNumChannels);
|
||||
writeInt(out, mSampleRate);
|
||||
writeInt(out, mNumChannels * mSampleRate * mBitsPerSample / 8);
|
||||
writeShort(out, (short)(mNumChannels * mBitsPerSample / 8));
|
||||
writeShort(out, mBitsPerSample);
|
||||
|
||||
/* data chunk */
|
||||
writeId(out, "data");
|
||||
writeInt(out, mNumBytes);
|
||||
|
||||
return HEADER_LENGTH;
|
||||
}
|
||||
|
||||
private static void writeId(OutputStream out, String id) throws IOException {
|
||||
for (int i = 0; i < id.length(); i++) out.write(id.charAt(i));
|
||||
}
|
||||
|
||||
private static void writeInt(OutputStream out, int val) throws IOException {
|
||||
out.write(val >> 0);
|
||||
out.write(val >> 8);
|
||||
out.write(val >> 16);
|
||||
out.write(val >> 24);
|
||||
}
|
||||
|
||||
private static void writeShort(OutputStream out, short val) throws IOException {
|
||||
out.write(val >> 0);
|
||||
out.write(val >> 8);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(
|
||||
"WaveHeader format=%d numChannels=%d sampleRate=%d bitsPerSample=%d numBytes=%d",
|
||||
mFormat, mNumChannels, mSampleRate, mBitsPerSample, mNumBytes);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
<HTML>
|
||||
<BODY>
|
||||
Simple, synchronous SREC speech recognition API.
|
||||
@hide
|
||||
</BODY>
|
||||
</HTML>
|
||||
Reference in New Issue
Block a user