PocketSphinx on Android recognises words even if they aren't spoken

PocketSphinx on Android recognises words even if they aren't spoken - android

I am using PocketSphinx for android-23.
I want to code an offline assistant for one of my apps.
I have successfully used recognizer.addKeyphraseSearch to initialize the assistant. For eg. In this case I say "Hello" to initialize it.
this is my entire code
public class Farmax_2 extends Activity implements
RecognitionListener {
/* Named searches allow to quickly reconfigure the decoder */
private static final String KWS_SEARCH = "wakeup";
private static final String ahead = "about";
private static final String PHONE_SEARCH = "ahead";
private static final String MENU_SEARCH = "menu";
TextToSpeech t1;
Button btn;
/* Keyword we are looking for to activate menu */
private static final String KEYPHRASE = "hello";
/* Used to handle permission request */
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;
private SpeechRecognizer recognizer;
private HashMap<String, Integer> captions;
#Override
public void onCreate(Bundle state) {
super.onCreate(state);
setContentView(R.layout.activity_farmax_2);
// Check if user has given permission to record audio
int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
return;
}
btn=(Button)findViewById(R.id.buttonme);
btn.setOnClickListener(new View.OnClickListener() {
#Override
public void onClick(View v) {
Intent inte = new Intent(Farmax_2.this, MainMenu.class);
startActivity(inte);
}
});
t1 = new TextToSpeech(getApplicationContext(), new TextToSpeech.OnInitListener() {
#Override
public void onInit(int status) {
if (status != TextToSpeech.ERROR) {
t1.setLanguage(Locale.UK);
}
}
});
runRecognizerSetup();
}
public void omku(View view) { Intent in=new Intent(this,abtus.class);
startActivity(in);}
private void runRecognizerSetup() {
// Recognizer initialization is a time-consuming and it involves IO,
// so we execute it in async task
new AsyncTask<Void, Void, Exception>() {
#Override
protected Exception doInBackground(Void... params) {
try {
Assets assets = new Assets(Farmax_2.this);
File assetDir = assets.syncAssets();
setupRecognizer(assetDir);
} catch (IOException e) {
return e;
}
return null;
}
#Override
protected void onPostExecute(Exception result) {
if (result != null) {
} else {
switchSearch(KWS_SEARCH);
}
}
}.execute();
}
#Override
public void onRequestPermissionsResult(int requestCode,
String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
runRecognizerSetup();
} else {
finish();
}
}
}
#Override
public void onDestroy() {
super.onDestroy();
if (recognizer != null) {
recognizer.cancel();
recognizer.shutdown();
}
}
/**
* In partial result we get quick updates about current hypothesis. In
* keyword spotting mode we can react here, in other modes we need to wait
* for final result in onResult.
*/
#Override
public void onPartialResult(Hypothesis hypothesis) {
if (hypothesis == null)
return;
String text = hypothesis.getHypstr();
switch (text) {
case KEYPHRASE: {
omkar();
break;
}
case ahead: {
Intent in = new Intent(this, abtus.class);
startActivity(in);
break;
// t1.speak("taking you to privacy policy of farmax.", TextToSpeech.QUEUE_FLUSH, null);
}
case PHONE_SEARCH: {
Intent in = new Intent(this, MainMenu.class);
startActivity(in);
// t1.speak("Main Menu.", TextToSpeech.QUEUE_FLUSH, null);
break;
}
}
}
private void omkar() {
t1.speak("Yes sir.", TextToSpeech.QUEUE_FLUSH, null);
switchSearch(MENU_SEARCH);
}
/**
* This callback is called when we stop the recognizer.
*/
#Override
public void onResult(Hypothesis hypothesis) {
if (hypothesis != null) {
String text = hypothesis.getHypstr();
makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
}
}
#Override
public void onBeginningOfSpeech() {
}
/**
* We stop recognizer here to get a final result
*/
#Override
public void onEndOfSpeech() {
}
private void switchSearch(String searchName) {
recognizer.stop();
// If we are not spotting, start listening with timeout (10000 ms or 10 seconds).
if (searchName.equals(KWS_SEARCH))
recognizer.startListening(searchName);
else
recognizer.startListening(searchName, 10000);
}
private void setupRecognizer(File assetsDir) throws IOException {
// The recognizer can be configured to perform multiple searches
// of different kind and switch between them
recognizer = SpeechRecognizerSetup.defaultSetup()
.setAcousticModel(new File(assetsDir, "en-us-ptm"))
.setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
.setRawLogDir(assetsDir) // To disable logging of raw audio comment out this call (takes a lot of space on the device)
.getRecognizer();
recognizer.addListener(this);
/** In your application you might not need to add all those searches.
* They are added here for demonstration. You can leave just one.
*/
// Create keyword-activation search.
recognizer.addKeyphraseSearch(KWS_SEARCH, "hello");
// Create grammar-based search for selection between demos
File menuGrammar = new File(assetsDir, "firstscn.gram");
recognizer.addGrammarSearch(MENU_SEARCH, menuGrammar);
// Create grammar-based search for digit recognition
}
#Override
public void onError(Exception error) {
}
#Override
public void onTimeout() {
switchSearch(KWS_SEARCH);
}
}
when I say hello, it responds correctly by replying "Yes sir" via tts. But after that it is supposed to switch menu and wait for the further commands. In this case there are two.
#Override
public void onPartialResult(Hypothesis hypothesis) {
if (hypothesis == null)
return;
String text = hypothesis.getHypstr();
switch (text) {
case KEYPHRASE: {
omkar();
break;
}
case ahead: {
Intent in = new Intent(this, abtus.class);
startActivity(in);
break;
// t1.speak("taking you to privacy policy of farmax.", TextToSpeech.QUEUE_FLUSH, null);
}
case PHONE_SEARCH: {
Intent in = new Intent(this, MainMenu.class);
startActivity(in);
// t1.speak("Main Menu.", TextToSpeech.QUEUE_FLUSH, null);
break;
}
}
}
But the problem is that It doesnt wait for my command after it switches the menu.
Sometimes a toast pops up with "about" or sometimes with "ahead" even though I dont speak them. The app freezes badly after that and leaves me no other option than to close it.
If have also tried if else statements other than switch and case. But they don't seem to help much.
I have also tried to use the above code in onResult rather than onPartialResult but that doesnt help as well.
using sphinx tools I have created my own dictionary and grammar file.
Here is the grammar file content for this case.
#JSGF V1.0;
grammar firstscn;
public <item> = about | ahead;
Where am I going wrong? Please help me.

Related

Restart SpeechRecognizer using onUtteranceProgressListener

I've been working on this code for weeks with no progress at all, even though it seems like such an easy fix!! Basically I am using TextToSpeech with SpeechRecognizer in order to have the app ask me a question and then listen for my answer. Here's my code... I'm thinking its a thread problem or something.
Basically when I hold down the button, SPeechRecognizer starts listening until i take away finger... this works fine.. but I programmatically want the SpeechRecognizer to start up again after Utterance is Completed or simulate a button press to get it to work but neither works...
public class MainActivity extends AppCompatActivity {
TextToSpeech textToSpeech;
// SpeechRecognizer
SpeechRecognizer mSpeechRecognizer;
Intent mSpeechRecognizerIntent;
// Speech Analysis Support
SpeechToTextAnalyzer speechToTextAnalyzer;
String speechToTextAnalyzerResponse;
String SPEECH_RESPONSE_BACK;
int buttonPressCounter;
boolean responsive;
TextView StatusDisplay, WordingDisplay;
Button SimulateButton;
#Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
checkPermission();
buttonPressCounter = 0;
// Initialize TextToSpeech
textToSpeech = new TextToSpeech(getApplicationContext(), new TextToSpeech.OnInitListener() {
#Override
public void onInit(int status) {
if(status != TextToSpeech.ERROR) {
textToSpeech.setLanguage(Locale.CANADA);
}
textToSpeech.setOnUtteranceProgressListener(new UtteranceProgressListener() {
#Override
public void onStart(String utteranceId) {
// Speaking started.
}
#Override
public void onDone(String utteranceId) {
// Speaking stopped.
if(responsive){ // if(utteranceId == "LISTEN_FOR_NEW_RESPONSE")
// possible method one:
SimulateButton.performClick();
// possible method two:
//mSpeechRecognizer.startListening(mSpeechRecognizerIntent);
// NOTE: SO FAR NEITHER OF THESE LINES MAKE THE SPEECHRECOGNIZER START UP WITHOUT ME TOUCHING THE BUTTON!!!
}
}
#Override
public void onError(String utteranceId) {
// There was an error.
}
});
}
});
// Initialize UI View Components
StatusDisplay = (TextView)findViewById(R.id.StatusUpdater);
WordingDisplay = (TextView)findViewById(R.id.WordsHeard);
// Create Reference to Classes
speechToTextAnalyzer = new SpeechToTextAnalyzer(this, this);
// Initialize SpeechRecognizer and Intent
final SpeechRecognizer mSpeechRecognizer = SpeechRecognizer.createSpeechRecognizer(this);
final Intent mSpeechRecognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,
Locale.getDefault());
mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1);
mSpeechRecognizer.setRecognitionListener(new RecognitionListener() {
#Override
public void onReadyForSpeech(Bundle bundle) {
}
#Override
public void onBeginningOfSpeech() {
}
#Override
public void onRmsChanged(float v) {
}
#Override
public void onBufferReceived(byte[] bytes) {
}
#Override
public void onEndOfSpeech() {
}
#Override
public void onError(int errorCode) {
String errorMessage = getErrorText(errorCode);
//Log.d(LOG_TAG, "FAILED " + errorMessage);
//listeningOn = false; // UNSURE!!
}
// accompanying text for onError
public String getErrorText(int errorCode) {
String message;
switch (errorCode) {
case SpeechRecognizer.ERROR_AUDIO:
message = "Audio Recording Error";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "Client Side Error";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "Insufficient Permissions";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "Network Error";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "Network TimeOut";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "No Match";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RecognitionService Busy";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "Error From Server";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "No Speech Input";
break;
default:
message = "Couldn't Understand, Please Try Again";
break;
}
return message;
}
#Override
public void onResults(Bundle bundle) {
//getting all the matches
ArrayList<String> matches = bundle
.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
String firstMatch = "";
//displaying the first match
if (matches != null) {
for(int i = 0; i < matches.size(); i++){
firstMatch = matches.get(i); // get the last one, which has the actual words I spoke...
}
matches.clear();
}
//StatusDisplay.setText("Not Listening...");
// ENTRY POINT FOR ALL COMMAND FUNCTIONALITY
speechToTextAnalyzerResponse = speechToTextAnalyzer.AnalyzeSpeechSingleResponse(firstMatch);
buttonPressCounter = speechToTextAnalyzer.AnalyzeSpeechCounter(buttonPressCounter);
responsive = speechToTextAnalyzer.AnalyzeSpeechResponseThenListen(buttonPressCounter);
SPEECH_RESPONSE_BACK = speechToTextAnalyzerResponse;
WordingDisplay.setText(SPEECH_RESPONSE_BACK);
if(responsive){
RESPONSIVE_SPEECH("... okay now say something new ... ");
}
else {
SINGLE_RESPONSE();
}
}
#Override
public void onPartialResults(Bundle bundle) {
}
#Override
public void onEvent(int i, Bundle bundle) {
}
}); // end of mSpeechRecognizer.setRecognitionListener
SimulateButton = (Button)findViewById(R.id.DoSomething);
SimulateButton.setOnTouchListener(new View.OnTouchListener() {
#Override
public boolean onTouch(View view, MotionEvent motionEvent) {
switch (motionEvent.getAction()) {
case MotionEvent.ACTION_UP:
//when the user removed the finger
mSpeechRecognizer.stopListening();
StatusDisplay.setText("Not Listening...");
break;
case MotionEvent.ACTION_DOWN:
//finger is on the button
StatusDisplay.setText("Listening...");
mSpeechRecognizer.startListening(mSpeechRecognizerIntent);
// WHILE I HOLD BUTTON THIS ACTUALLY WORKS BUT NOT INSIDE ONUTTERANCEPROGRESSLISTENER.........
break;
}
return true;
}
});
} // end of onCreate()
public void SINGLE_RESPONSE(){
if (!textToSpeech.isSpeaking()) {
textToSpeech.speak(SPEECH_RESPONSE_BACK, TextToSpeech.QUEUE_FLUSH, null);
//textToSpeech.speak(WordingDisplay.getText().toString(), TextToSpeech.QUEUE_FLUSH, null);
} else {
textToSpeech.stop();
}
}
public void RESPONSIVE_SPEECH(String PleaseSayThis){
SPEECH_RESPONSE_BACK = PleaseSayThis;
// https://android-developers.googleblog.com/2009/09/introduction-to-text-to-speech-in.html
if (!textToSpeech.isSpeaking()) {
HashMap<String, String> stringStringHashMap = new HashMap<String, String>();
stringStringHashMap.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, "LISTEN_FOR_NEW_RESPONSE"); // when using normal text to speech, the ID is "ID_MAIN_CALL" to be searched for in onUtteranceCompleted
textToSpeech.speak(SPEECH_RESPONSE_BACK, textToSpeech.QUEUE_FLUSH, stringStringHashMap);
} else {
textToSpeech.stop();
}
}
private void checkPermission() {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
if (!(ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) == PackageManager.PERMISSION_GRANTED)) {
Intent intent = new Intent(Settings.ACTION_APPLICATION_DETAILS_SETTINGS,
Uri.parse("package:" + getPackageName()));
startActivity(intent);
finish();
}
}
}
protected void onDestroy(){
if(textToSpeech != null) {
textToSpeech.stop();
textToSpeech.shutdown();
textToSpeech = null;
}
super.onDestroy();
}

According to the UtteranceProgressListener docs, "The callbacks specified in this method can be called from multiple threads."
To verify this, you can add
boolean wasCalledFromBackgroundThread = (Thread.currentThread().getId() != 1);
Log.i("XXX", "was onDone() called on a background thread? : " + wasCalledFromBackgroundThread);
to the onDone() method body.
In my experience, they are called on background threads more often than not.
So, inside your onDone() method body, I would suggest surrounding the UI manipulation with runOnUiThread() like so:
#Override
public void onDone(String utteranceId) {
runOnUiThread(new Runnable() {
#Override
public void run() {
// possible method one:
SimulateButton.performClick();
// possible method two:
//mSpeechRecognizer.startListening(mSpeechRecognizerIntent);
}
});
}

android pocketsphinx not sending results or partial results

See code below. Pocketsphinx is configured with a keyphrase search to trigger on the word "record". Searching is then started, and talking causes onBeginningOfSpeech and onEndOfSpeech to be called, but no other listener methods get called, whatever I say.
public class MainActivity extends AppCompatActivity implements RecognitionListener {
private final Handler handler = new Handler ();
private SpeechRecognizer recognizer;
private final static String KEYWORD = "record";
#Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_start);
makeButtonStartButton ();
ensureRecordAudioPermission ();
startKeywordListener ();
}
private void startKeywordListener() {
// Recognizer initialization is a time-consuming and it involves IO,
// so we execute it in async task
new AsyncTask<Void, Void, Exception>() {
#Override
protected Exception doInBackground(Void... params) {
try {
Assets assets = new Assets(MainActivity.this);
File assetDir = assets.syncAssets();
setupRecognizer(assetDir);
} catch (IOException e) {
return e;
}
return null;
}
#Override
protected void onPostExecute(Exception result) {
if (result != null) {
Log.e("MainActivity", "Failed to init recognizer " + result);
} else {
startKeywordSearch ();
}
}
}.execute();
}
private void startKeywordSearch() {
Log.i("MainActivity", "Starting keyword search: " + KEYWORD);
recognizer.stop();
recognizer.startListening(KEYWORD);
}
private void setupRecognizer(File assetsDir) throws IOException {
// The recognizer can be configured to perform multiple searches
// of different kind and switch between them
recognizer = SpeechRecognizerSetup.defaultSetup()
.setAcousticModel(new File(assetsDir, "en-us-ptm"))
.setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
.setRawLogDir(assetsDir) // To disable logging of raw audio comment out this call (takes a lot of space on the device)
.getRecognizer();
recognizer.addListener(this);
// Create keyword-activation search.
recognizer.addKeyphraseSearch(KEYWORD, KEYWORD);
}
private void ensureRecordAudioPermission() {
// Check if user has given permission to record audio
int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
if (permissionCheck == PackageManager.PERMISSION_DENIED) {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, 1);
return;
}
}
#Override
public void onRequestPermissionsResult(int requestCode,
String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == 1) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
startKeywordListener();
} else {
finish();
}
}
}
#Override
public void onDestroy() {
super.onDestroy();
if (recognizer != null) {
recognizer.cancel();
recognizer.shutdown();
}
}
private void makeButtonStartButton() {
findViewById(R.id.startButton).setOnClickListener(startRecording);
}
private final View.OnClickListener startRecording = new View.OnClickListener() {
#Override
public void onClick(View v) {
startActivity(new Intent(MainActivity.this, RecordingActivity.class));
}
};
#Override
public void onBeginningOfSpeech() {
Log.i ("MainActivity", "Beginning of speech detected");
}
#Override
public void onEndOfSpeech() {
Log.i ("MainActivity", "End of speech detected");
}
#Override
public void onPartialResult(Hypothesis hypothesis) {
if (hypothesis == null) return; // reject the null hypothesis (!)
Log.i ("MainActivity", "Partial result: " + hypothesis.getHypstr() + " (" + hypothesis.getProb() + ")");
if (hypothesis.getHypstr().equals(KEYWORD))
startRecording.onClick(null);
}
#Override
public void onResult(Hypothesis hypothesis) {
if (hypothesis == null) return; // reject the null hypothesis (!)
Log.i ("MainActivity", "Complete result: " + hypothesis.getHypstr() + " (" + hypothesis.getProb() + ")");
if (hypothesis.getHypstr().equals(KEYWORD))
startRecording.onClick(null);
}
#Override
public void onError(Exception e) {
Log.i ("MainActivity", "Error detected", e);
}
#Override
public void onTimeout() {
Log.i ("MainActivity", "Timeout occurred");
}
}

Pocketsphinx decoder fails to initialize

I am trying to implement a hotword in my app which will start the speech input using CMUSphinx. I copied the code from the tutorial, did some changes to start the speech input but it is not working. My mainActivity code for the recognition looks something like this:
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
if (PrefManager.getString(Constant.ACCESS_TOKEN, null) == null) {
throw new IllegalStateException("Not signed in, Cannot access resource!");
}
clientBuilder = new ClientBuilder();
init();
int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
if (permissionCheck == PackageManager.PERMISSION_DENIED) {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
return;
}
runRecognizerSetup();
}
private static final String KWS_SEARCH = "wakeup";
private void runRecognizerSetup() {
// Recognizer initialization is a time-consuming and it involves IO,
// so we execute it in async task
new AsyncTask<Void, Void, Exception>() {
#Override
protected Exception doInBackground(Void... params) {
try {
Assets assets = new Assets(MainActivity.this);
File assetDir = assets.syncAssets();
setupRecognizer(assetDir);
} catch (IOException e) {
return e;
}
return null;
}
#Override
protected void onPostExecute(Exception result) {
if (result != null) {
Toast.makeText(getApplicationContext(),"Failed",LENGTH_SHORT).show();
} else {
switchSearch(KWS_SEARCH);
}
}
}.execute();
}
#Override
public void onRequestPermissionsResult(int requestCode,
String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
runRecognizerSetup();
} else {
finish();
}
}
}
/**
* In partial result we get quick updates about current hypothesis. In
* keyword spotting mode we can react here, in other modes we need to wait
* for final result in onResult.
*/
#Override
public void onPartialResult(Hypothesis hypothesis) {
if (hypothesis == null)
return;
String text = hypothesis.getHypstr();
if (text.equals(KEYPHRASE))
promptSpeechInput();
}
/**
* This callback is called when we stop the recognizer.
*/
#Override
public void onResult(Hypothesis hypothesis){
if (hypothesis != null) {
String pqr = hypothesis.getHypstr();
Toast.makeText(getApplicationContext(),pqr,LENGTH_SHORT).show();
}
}
#Override
public void onBeginningOfSpeech() {
}
/**
* We stop recognizer here to get a final result
*/
#Override
public void onEndOfSpeech() {
if (!recognizer.getSearchName().equals(KWS_SEARCH))
switchSearch(KWS_SEARCH);
}
private void switchSearch(String searchName) {
recognizer.stop();
// If we are not spotting, start listening with timeout (10000 ms or 10 seconds).
if (searchName.equals(KWS_SEARCH))
recognizer.startListening(searchName);
else
recognizer.startListening(searchName, 10000);
}
private void setupRecognizer(File assetsDir) throws IOException {
// The recognizer can be configured to perform multiple searches
// of different kind and switch between them
recognizer = SpeechRecognizerSetup.defaultSetup()
.setAcousticModel(new File(assetsDir, "en-us-ptm"))
.setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
.setRawLogDir(assetsDir) // To disable logging of raw audio comment out this call (takes a lot of space on the device)
.getRecognizer();
recognizer.addListener(this);
/** In your application you might not need to add all those searches.
* They are added here for demonstration. You can leave just one.
*/
// Create keyword-activation search.
recognizer.addKeyphraseSearch(KWS_SEARCH, KEYPHRASE);
}
#Override
public void onError(Exception error) {
Toast.makeText(getApplicationContext(),"error",LENGTH_SHORT).show();
}
#Override
public void onTimeout() {
switchSearch(KWS_SEARCH);
}
My keyphrase is "HI". The problem is that whenever I run this code I get a toast message "failed" due to this:
Toast.makeText(getApplicationContext(),"Failed",LENGTH_SHORT).show();
What can be possibly wrong with this?
I am very new to these things so any help in this will be really helpful.
Thank you.

Implement pocketsphinx on android for word translator

I am currently developing a word translator with speech-to-text function. I studied pocketsphinx but I can only let it search the dictionary by declaring a default keyword. Is there a way that you will speak first before it will search the dictionary? I am stucked here. Any help will be appreciated. Thankyou! Here's the code:
/* Named searches allow to quickly reconfigure the decoder */
private static final String KWS_SEARCH = "wakeup";
private static final String FORECAST_SEARCH = "forecast";
private static final String DIGITS_SEARCH = "digits";
private static final String PHONE_SEARCH = "phones";
private static final String MENU_SEARCH = "menu";
/* Keyword we are looking for to activate menu */
private static final String KEYPHRASE = "";
/* Used to handle permission request */
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;
private SpeechRecognizer recognizer;
private HashMap<String, Integer> captions;
private void runRecognizerSetup() {
// Recognizer initialization is a time-consuming and it involves IO,
// so we execute it in async task
new AsyncTask<Void, Void, Exception>() {
#Override
protected Exception doInBackground(Void... params) {
try {
Assets assets = new Assets(TranslateActivity.this);
File assetDir = assets.syncAssets();
setupRecognizer(assetDir);
} catch (IOException e) {
return e;
}
return null;
}
#Override
protected void onPostExecute(Exception result) {
if (result != null) {
Toast.makeText(TranslateActivity.this,"Failed to initialize recognizer " + result, Toast.LENGTH_SHORT).show();
popupWindow.dismiss();
} else {
lblPrepare.setText("VOICE RECOGNITION READY");
lblSpeak.setVisibility(View.VISIBLE);
recognizer.startListening(KWS_SEARCH);
}
}
}.execute();
}
#Override
public void onRequestPermissionsResult(int requestCode,
String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
runRecognizerSetup();
} else {
finish();
}
}
}
#Override
public void onDestroy() {
super.onDestroy();
if (recognizer != null) {
recognizer.cancel();
recognizer.shutdown();
}
}
/**
* In partial result we get quick updates about current hypothesis. In
* keyword spotting mode we can react here, in other modes we need to wait
* for final result in onResult.
*/
#Override
public void onPartialResult(Hypothesis hypothesis) {
if (hypothesis == null)
return;
String text = hypothesis.getHypstr();
if(text.equals(""))
{
}else
{
Toast.makeText(TranslateActivity.this, text, Toast.LENGTH_SHORT).show();
}
/*if (text.equals(KEYPHRASE)){
recognizer.cancel();
// <- You have to implement this
recognizer.startListening(KWS_SEARCH);
}/*
switchSearch(MENU_SEARCH);
else if (text.equals(DIGITS_SEARCH))
switchSearch(DIGITS_SEARCH);
else if (text.equals(PHONE_SEARCH))
switchSearch(PHONE_SEARCH);
else if (text.equals(FORECAST_SEARCH))
switchSearch(FORECAST_SEARCH);
else
Toast.makeText(TranslateActivity.this,text, Toast.LENGTH_SHORT).show();*/
}
/**
* This callback is called when we stop the recognizer.
*/
#Override
public void onResult(Hypothesis hypothesis) {
/*
if (hypothesis != null) {
String text = hypothesis.getHypstr();
makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
}*/
if (hypothesis!=null)
{
popupWindow.dismiss();
performAction(hypothesis);
}
}
#Override
public void onBeginningOfSpeech() {
}
/**
* We stop recognizer here to get a final result
*/
#Override
public void onEndOfSpeech() {
/*if (!recognizer.getSearchName().equals(KWS_SEARCH))
switchSearch(KWS_SEARCH);*/
recognizer.stop();
popupWindow.dismiss();
}
/*private void switchSearch(String searchName) {
recognizer.stop();
// If we are not spotting, start listening with timeout (10000 ms or 10 seconds).
if (searchName.equals(KWS_SEARCH))
recognizer.startListening(searchName);
else
recognizer.startListening(searchName);
Toast.makeText(TranslateActivity.this,searchName, Toast.LENGTH_SHORT).show();
}*/
private void setupRecognizer(File assetsDir) throws IOException {
// The recognizer can be configured to perform multiple searches
// of different kind and switch between them
recognizer = SpeechRecognizerSetup.defaultSetup()
.setAcousticModel(new File(assetsDir, "en-us-ptm"))
.setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
.setRawLogDir(assetsDir) // To disable logging of raw audio comment out this call (takes a lot of space on the device)
.setKeywordThreshold(1e-45f) // Threshold to tune for keyphrase to balance between false alarms and misses
.setBoolean("-allphone_ci", true) // Use context-independent phonetic search, context-dependent is too slow for mobile
.getRecognizer();
recognizer.addListener(this);
/** In your application you might not need to add all those searches.
* They are added here for demonstration. You can leave just one.
*/
// Create keyword-activation search.
recognizer.addKeyphraseSearch(KWS_SEARCH, KEYPHRASE);
// Create grammar-based search for selection between demos
/* File menuGrammar = new File(assetsDir, "menu.gram");
recognizer.addGrammarSearch(MENU_SEARCH, menuGrammar);
// Create grammar-based search for digit recognition
File digitsGrammar = new File(assetsDir, "digits.gram");
recognizer.addGrammarSearch(DIGITS_SEARCH, digitsGrammar);
// Create language model search
File languageModel = new File(assetsDir, "weather.dmp");
recognizer.addNgramSearch(FORECAST_SEARCH, languageModel);
// Phonetic search
File phoneticModel = new File(assetsDir, "en-phone.dmp");
recognizer.addAllphoneSearch(PHONE_SEARCH, phoneticModel);*/
}
#Override
public void onError(Exception error) {
Toast.makeText(TranslateActivity.this,"Failed: "+error.getMessage(), Toast.LENGTH_SHORT).show();
}
#Override
public void onTimeout() {
/* switchSearch(KWS_SEARCH);*/
}
public void performAction(Hypothesis hypothesis) {
String strWord = hypothesis.getHypstr();
txtOriginal.setText(strWord);
if(recognizer.getDecoder().lookupWord("abc") == null) {
// do something
}
Toast.makeText(TranslateActivity.this, strWord, Toast.LENGTH_SHORT).show();
}

Handling Errors in PocketSphinx Android app

I am using the default dictionary that comes with the pocketsphinx demo which is good for my purposes. When a user enters a phrase, the app starts a keyphrase listening but if the word is not found in the dictionary the app crashes. The app crashes onError() within a service. How is the error handling done? is there any way I can catch the error? Overall I would just like the service to call stopSelf() when an error happens so the main activity won't crash as well.
Errors:
ERROR: "kws_search.c", line 165: The word 'phonez' is missing in the dictionary
Fatal signal 11 (SIGSEGV) at 0x00000000 (code=1), thread 5389 (1994.wherephone)
Here is my service class:
public class WherePhoneService extends Service implements RecognitionListener {
private static String SettingStorage = "SavedData";
SharedPreferences settingData;
private SpeechRecognizer recognizer;
private String sInput;
private String sOutput;
private int seekVal;
private TextToSpeech reply;
private AsyncTask t;
public WherePhoneService() {
}
#Override
public IBinder onBind(Intent intent) {
// TODO: Return the communication channel to the service.
throw new UnsupportedOperationException("Not yet implemented");
}
#Override
public int onStartCommand(Intent intent, int flags, int startId) {
makeText(getApplicationContext(), "onHandle start", Toast.LENGTH_SHORT).show();
getValues();
startTTS();
t = new AsyncTask<Void, Void, Exception>() {
#Override
protected Exception doInBackground(Void... params) {
try {
Assets assets = new Assets(WherePhoneService.this);
File assetDir = assets.syncAssets();
setupRecognizer(assetDir);
} catch (IOException e) {
return e;
}
return null;
}
#Override
protected void onPostExecute(Exception result) {
if (result != null) {
//((TextView) findViewById(R.id.caption_text)).setText("Failed to init recognizer " + result);
} else {
switchSearch(sInput);
}
}
}.execute();
return Service.START_STICKY;
}
private void setupRecognizer(File assetsDir) throws IOException {
recognizer = defaultSetup()
.setAcousticModel(new File(assetsDir, "en-us-ptm"))
.setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
// To disable logging of raw audio comment out this call (takes a lot of space on the device)
//.setRawLogDir(assetsDir)
// Threshold to tune for keyphrase to balance between false alarms and misses
.setKeywordThreshold(1e-45f)
// Use context-independent phonetic search, context-dependent is too slow for mobile
.setBoolean("-allphone_ci", true)
.getRecognizer();
recognizer.addListener(this);
// Create keyword-activation search.
recognizer.addKeyphraseSearch(sInput, sInput);
}
private void switchSearch(String searchName) {
recognizer.stop();
// If we are not spotting, start listening with timeout (10000 ms or 10 seconds).
if (searchName.equals(sInput))
recognizer.startListening(searchName);
else
recognizer.startListening(searchName, 10000);
}
#Override
public void onBeginningOfSpeech() {
}
#Override
public void onEndOfSpeech() {
if (!recognizer.getSearchName().equals(sInput))
switchSearch(sInput);
}
#Override
public void onPartialResult(Hypothesis hypothesis) {
if (hypothesis == null)
return;
String text = hypothesis.getHypstr();
makeText(getApplicationContext(), "Partial", Toast.LENGTH_SHORT).show();
if (text.equals(sInput)) {
setVolume();
// Text to speech
reply.speak(sOutput, TextToSpeech.QUEUE_ADD, null);
switchSearch(sInput);
}
else {
makeText(getApplicationContext(), "Try again", Toast.LENGTH_SHORT).show();
switchSearch(sInput);
}
}
#Override
public void onResult(Hypothesis hypothesis) {
if (hypothesis != null) {
// restart listener and affirm that partial has past
makeText(getApplicationContext(), "end", Toast.LENGTH_SHORT).show();
//recognizer.startListening(sInput);
switchSearch(sInput);
}
}
public void onError(Exception e) {
e.printStackTrace(); // not all Android versions will print the stack trace automatically
Intent intent = new Intent ();
intent.setAction ("com.mydomain.SEND_LOG"); // see step 5.
intent.setFlags (Intent.FLAG_ACTIVITY_NEW_TASK); // required when starting from Application
startActivity (intent);
stopSelf();
}
#Override
public void onTimeout() {
switchSearch(sInput);
}
public void startTTS() {
reply = new TextToSpeech(getApplicationContext(), new TextToSpeech.OnInitListener() {
#Override
public void onInit(int status) {
if(status != TextToSpeech.ERROR){
reply.setLanguage(Locale.UK);
}
}
});
}
public void getValues() {
settingData = getBaseContext().getSharedPreferences(SettingStorage, 0);
sInput = settingData.getString("inputstring", "Where is my phone").toString().toLowerCase().replaceAll("[^\\w\\s]", "");
sOutput = settingData.getString("outputstring", "").toString().toLowerCase();
seekVal = settingData.getInt("seekval", 0);
}
public void setVolume() {
int seekValConvert = 0;
AudioManager audioManager = (AudioManager) getSystemService(Context.AUDIO_SERVICE);
int getMaxPhoneVol = audioManager.getStreamMaxVolume(audioManager.STREAM_MUSIC);
seekValConvert = ((seekVal * getMaxPhoneVol)/100);
audioManager.setStreamVolume(audioManager.STREAM_MUSIC, seekValConvert, 0);
}
#Override
public void onDestroy() {
super.onDestroy();
makeText(getApplicationContext(), "destroy", Toast.LENGTH_SHORT).show();
recognizer.cancel();
recognizer.shutdown();
t.cancel(true);
}
}

Crash is a bug in pocketsphinx-android. If you update to latest version from github, it should properly throw RuntimeException on any errors in methods addKeyphrase and setSearch.

Develop Reference

The Android operating system is a mobile operating system that was developed by Google (GOOGL?) to be primarily used for touchscreen devices, cell phones, and tablets.

PocketSphinx on Android recognises words even if they aren't spoken - android

Related

Restart SpeechRecognizer using onUtteranceProgressListener

android pocketsphinx not sending results or partial results

Pocketsphinx decoder fails to initialize

Implement pocketsphinx on android for word translator

Handling Errors in PocketSphinx Android app

Categories

Resources