Flutter Voice Assistant Localization: Build Multilingual Alexa and Google Assistant Apps
Voice-first interfaces are reshaping how users interact with apps. Building Flutter apps that integrate with voice assistants like Google Assistant and Alexa requires special attention to localization. This guide covers everything from speech recognition to multilingual voice responses.
Why Voice Localization Matters
Voice interfaces present unique localization challenges:
- Speech patterns vary by region (accents, dialects, speaking pace)
- Wake words differ across languages
- Natural language understanding requires locale-specific training
- Voice responses must sound natural in each language
- Fallback behaviors need cultural adaptation
Setting Up Voice Recognition
Google Speech-to-Text Integration
import 'package:speech_to_text/speech_to_text.dart';
class VoiceLocalizationService {
final SpeechToText _speech = SpeechToText();
Future<void> initializeSpeech(Locale locale) async {
bool available = await _speech.initialize(
onStatus: (status) => print('Status: $status'),
onError: (error) => print('Error: $error'),
);
if (available) {
// Get locales supported by the device
List<LocaleName> locales = await _speech.locales();
// Find matching locale for user's preference
LocaleName? matchingLocale = _findBestMatch(locale, locales);
if (matchingLocale != null) {
await _startListening(matchingLocale.localeId);
}
}
}
LocaleName? _findBestMatch(Locale target, List<LocaleName> available) {
// First try exact match (e.g., 'en_US')
String exactId = '${target.languageCode}_${target.countryCode}';
LocaleName? exact = available.cast<LocaleName?>().firstWhere(
(l) => l?.localeId == exactId,
orElse: () => null,
);
if (exact != null) return exact;
// Fall back to language only (e.g., 'en')
return available.cast<LocaleName?>().firstWhere(
(l) => l?.localeId.startsWith(target.languageCode) ?? false,
orElse: () => null,
);
}
Future<void> _startListening(String localeId) async {
await _speech.listen(
onResult: (result) => _handleResult(result),
localeId: localeId,
listenMode: ListenMode.dictation,
);
}
void _handleResult(SpeechRecognitionResult result) {
if (result.finalResult) {
print('Recognized: ${result.recognizedWords}');
_processVoiceCommand(result.recognizedWords);
}
}
void _processVoiceCommand(String command) {
// Process the localized voice command
}
}
Locale-Aware Voice Commands
class VoiceCommandHandler {
final Map<String, Map<String, List<String>>> _commands = {
'en': {
'search': ['search for', 'find', 'look up', 'search'],
'navigate': ['go to', 'open', 'navigate to', 'show me'],
'play': ['play', 'start', 'begin'],
'stop': ['stop', 'pause', 'halt', 'end'],
},
'es': {
'search': ['buscar', 'encuentra', 'busca'],
'navigate': ['ir a', 'abrir', 'navegar a', 'muéstrame'],
'play': ['reproducir', 'iniciar', 'comenzar'],
'stop': ['detener', 'pausar', 'parar', 'terminar'],
},
'de': {
'search': ['suche nach', 'finde', 'suchen'],
'navigate': ['gehe zu', 'öffne', 'navigiere zu', 'zeig mir'],
'play': ['spielen', 'starten', 'beginnen'],
'stop': ['stoppen', 'pausieren', 'anhalten', 'beenden'],
},
'fr': {
'search': ['rechercher', 'chercher', 'trouver'],
'navigate': ['aller à', 'ouvrir', 'naviguer vers', 'montre-moi'],
'play': ['jouer', 'démarrer', 'commencer'],
'stop': ['arrêter', 'pause', 'stopper', 'terminer'],
},
};
String? detectIntent(String utterance, String languageCode) {
final commands = _commands[languageCode] ?? _commands['en']!;
final lowerUtterance = utterance.toLowerCase();
for (final entry in commands.entries) {
for (final trigger in entry.value) {
if (lowerUtterance.contains(trigger)) {
return entry.key;
}
}
}
return null;
}
String extractParameter(String utterance, String intent, String languageCode) {
final commands = _commands[languageCode] ?? _commands['en']!;
final triggers = commands[intent] ?? [];
String result = utterance.toLowerCase();
for (final trigger in triggers) {
result = result.replaceFirst(trigger, '').trim();
}
return result;
}
}
Text-to-Speech Localization
Configuring TTS for Multiple Languages
import 'package:flutter_tts/flutter_tts.dart';
class LocalizedTTS {
final FlutterTts _tts = FlutterTts();
final Map<String, TtsVoiceConfig> _voiceConfigs = {};
Future<void> initialize() async {
// Get available voices
List<dynamic> voices = await _tts.getVoices;
// Organize voices by language
for (var voice in voices) {
String locale = voice['locale'] ?? '';
String langCode = locale.split('-').first.split('_').first;
if (!_voiceConfigs.containsKey(langCode)) {
_voiceConfigs[langCode] = TtsVoiceConfig(
locale: locale,
name: voice['name'],
quality: _getVoiceQuality(voice),
);
}
}
// Set default configuration
await _tts.setSpeechRate(0.5);
await _tts.setVolume(1.0);
}
int _getVoiceQuality(Map<dynamic, dynamic> voice) {
// Prefer neural/enhanced voices
String name = (voice['name'] ?? '').toLowerCase();
if (name.contains('neural') || name.contains('wavenet')) return 3;
if (name.contains('enhanced')) return 2;
return 1;
}
Future<void> speak(String text, Locale locale) async {
String langCode = locale.languageCode;
// Find best voice for locale
TtsVoiceConfig? config = _voiceConfigs[langCode];
if (config != null) {
await _tts.setLanguage(config.locale);
if (config.name != null) {
await _tts.setVoice({'name': config.name, 'locale': config.locale});
}
} else {
// Fallback to system default
await _tts.setLanguage(locale.toLanguageTag());
}
// Adjust speech rate based on language characteristics
await _tts.setSpeechRate(_getSpeechRate(langCode));
await _tts.speak(text);
}
double _getSpeechRate(String langCode) {
// Some languages benefit from slower speech rates
switch (langCode) {
case 'ja':
case 'zh':
case 'ko':
return 0.4; // Slower for tonal languages
case 'es':
case 'it':
return 0.55; // Slightly faster for Romance languages
default:
return 0.5;
}
}
Future<void> speakWithSSML(String ssml, Locale locale) async {
// Use SSML for fine-grained control
await _tts.setLanguage(locale.toLanguageTag());
await _tts.speak(ssml);
}
}
class TtsVoiceConfig {
final String locale;
final String? name;
final int quality;
TtsVoiceConfig({
required this.locale,
this.name,
required this.quality,
});
}
SSML for Localized Voice Responses
class SSMLBuilder {
final StringBuffer _buffer = StringBuffer();
final String _locale;
SSMLBuilder(this._locale) {
_buffer.write('<speak xml:lang="$_locale">');
}
SSMLBuilder say(String text) {
_buffer.write(text);
return this;
}
SSMLBuilder pause(Duration duration) {
int ms = duration.inMilliseconds;
_buffer.write('<break time="${ms}ms"/>');
return this;
}
SSMLBuilder emphasis(String text, {String level = 'moderate'}) {
_buffer.write('<emphasis level="$level">$text</emphasis>');
return this;
}
SSMLBuilder sayAs(String text, String interpretAs) {
// interpretAs: date, time, telephone, currency, cardinal, ordinal
_buffer.write('<say-as interpret-as="$interpretAs">$text</say-as>');
return this;
}
SSMLBuilder phoneme(String text, String phonetic, {String alphabet = 'ipa'}) {
_buffer.write('<phoneme alphabet="$alphabet" ph="$phonetic">$text</phoneme>');
return this;
}
SSMLBuilder prosody(String text, {double? rate, String? pitch, String? volume}) {
final attrs = <String>[];
if (rate != null) attrs.add('rate="${(rate * 100).toInt()}%"');
if (pitch != null) attrs.add('pitch="$pitch"');
if (volume != null) attrs.add('volume="$volume"');
_buffer.write('<prosody ${attrs.join(' ')}>$text</prosody>');
return this;
}
String build() {
_buffer.write('</speak>');
return _buffer.toString();
}
}
// Usage example
String buildLocalizedResponse(String locale, String userName, int itemCount) {
final builder = SSMLBuilder(locale);
if (locale.startsWith('en')) {
builder
.say('Hello ')
.emphasis(userName)
.pause(Duration(milliseconds: 300))
.say('You have ')
.sayAs(itemCount.toString(), 'cardinal')
.say(' items in your cart.');
} else if (locale.startsWith('es')) {
builder
.say('Hola ')
.emphasis(userName)
.pause(Duration(milliseconds: 300))
.say('Tienes ')
.sayAs(itemCount.toString(), 'cardinal')
.say(' artículos en tu carrito.');
}
return builder.build();
}
Google Assistant Integration
Actions on Google with Flutter
class GoogleAssistantHandler {
final Map<String, LocalizedIntentHandler> _intentHandlers = {};
void registerIntent(String intentName, LocalizedIntentHandler handler) {
_intentHandlers[intentName] = handler;
}
Future<AssistantResponse> handleRequest(AssistantRequest request) async {
final locale = Locale(request.user.locale);
final intent = request.intent.name;
final handler = _intentHandlers[intent];
if (handler == null) {
return _buildFallbackResponse(locale);
}
return handler.handle(request, locale);
}
AssistantResponse _buildFallbackResponse(Locale locale) {
final messages = {
'en': "I'm sorry, I didn't understand that. Could you try again?",
'es': 'Lo siento, no entendí eso. ¿Podrías intentarlo de nuevo?',
'de': 'Es tut mir leid, das habe ich nicht verstanden. Könntest du es noch einmal versuchen?',
'fr': "Je suis désolé, je n'ai pas compris. Pourriez-vous réessayer?",
};
return AssistantResponse(
speech: messages[locale.languageCode] ?? messages['en']!,
expectUserResponse: true,
);
}
}
abstract class LocalizedIntentHandler {
Future<AssistantResponse> handle(AssistantRequest request, Locale locale);
}
class SearchIntentHandler implements LocalizedIntentHandler {
@override
Future<AssistantResponse> handle(AssistantRequest request, Locale locale) async {
final query = request.parameters['query'] as String?;
if (query == null || query.isEmpty) {
return _askForQuery(locale);
}
// Perform search
final results = await _performSearch(query, locale);
return _buildSearchResponse(results, locale);
}
AssistantResponse _askForQuery(Locale locale) {
final prompts = {
'en': 'What would you like to search for?',
'es': '¿Qué te gustaría buscar?',
'de': 'Wonach möchtest du suchen?',
'fr': 'Que souhaitez-vous rechercher?',
};
return AssistantResponse(
speech: prompts[locale.languageCode] ?? prompts['en']!,
expectUserResponse: true,
);
}
Future<List<SearchResult>> _performSearch(String query, Locale locale) async {
// Search implementation
return [];
}
AssistantResponse _buildSearchResponse(List<SearchResult> results, Locale locale) {
if (results.isEmpty) {
final noResults = {
'en': 'Sorry, I couldn\'t find any results.',
'es': 'Lo siento, no pude encontrar ningún resultado.',
'de': 'Entschuldigung, ich konnte keine Ergebnisse finden.',
'fr': 'Désolé, je n\'ai trouvé aucun résultat.',
};
return AssistantResponse(
speech: noResults[locale.languageCode] ?? noResults['en']!,
expectUserResponse: false,
);
}
// Build response with results
final templates = {
'en': 'I found {count} results. The top result is {title}.',
'es': 'Encontré {count} resultados. El mejor resultado es {title}.',
'de': 'Ich habe {count} Ergebnisse gefunden. Das beste Ergebnis ist {title}.',
'fr': 'J\'ai trouvé {count} résultats. Le meilleur résultat est {title}.',
};
String template = templates[locale.languageCode] ?? templates['en']!;
String speech = template
.replaceAll('{count}', results.length.toString())
.replaceAll('{title}', results.first.title);
return AssistantResponse(
speech: speech,
expectUserResponse: true,
);
}
}
Alexa Skills Integration
Building Multilingual Alexa Skills
class AlexaSkillHandler {
final Map<String, Map<String, String>> _slotSynonyms = {
'en': {
'yes': 'yes|yeah|yep|sure|ok|okay|affirmative',
'no': 'no|nope|nah|negative|cancel',
},
'de': {
'yes': 'ja|jawohl|klar|sicher|ok|okay',
'no': 'nein|nö|nicht|abbrechen',
},
'fr': {
'yes': 'oui|ouais|bien sûr|ok|d\'accord',
'no': 'non|pas|annuler',
},
};
Future<AlexaResponse> handleRequest(AlexaRequest request) async {
final locale = request.request.locale;
final type = request.request.type;
switch (type) {
case 'LaunchRequest':
return _handleLaunch(locale);
case 'IntentRequest':
return _handleIntent(request, locale);
case 'SessionEndedRequest':
return _handleSessionEnd(locale);
default:
return _handleUnknown(locale);
}
}
AlexaResponse _handleLaunch(String locale) {
final welcomeMessages = {
'en-US': 'Welcome to the app! What would you like to do?',
'en-GB': 'Welcome to the app! What would you like to do?',
'de-DE': 'Willkommen bei der App! Was möchten Sie tun?',
'fr-FR': 'Bienvenue dans l\'application! Que souhaitez-vous faire?',
'es-ES': '¡Bienvenido a la aplicación! ¿Qué te gustaría hacer?',
};
return AlexaResponse(
outputSpeech: AlexaOutputSpeech(
type: 'PlainText',
text: welcomeMessages[locale] ?? welcomeMessages['en-US']!,
),
shouldEndSession: false,
);
}
AlexaResponse _handleIntent(AlexaRequest request, String locale) {
final intentName = request.request.intent?.name;
switch (intentName) {
case 'SearchIntent':
return _handleSearch(request, locale);
case 'AMAZON.HelpIntent':
return _handleHelp(locale);
case 'AMAZON.StopIntent':
case 'AMAZON.CancelIntent':
return _handleStop(locale);
default:
return _handleUnknown(locale);
}
}
AlexaResponse _handleSearch(AlexaRequest request, String locale) {
final query = request.request.intent?.slots?['query']?.value;
final responses = {
'en-US': 'Searching for $query',
'de-DE': 'Suche nach $query',
'fr-FR': 'Recherche de $query',
'es-ES': 'Buscando $query',
};
return AlexaResponse(
outputSpeech: AlexaOutputSpeech(
type: 'PlainText',
text: responses[locale] ?? responses['en-US']!,
),
shouldEndSession: false,
);
}
AlexaResponse _handleHelp(String locale) {
final helpMessages = {
'en-US': 'You can ask me to search for something, or say stop to exit.',
'de-DE': 'Du kannst mich bitten, nach etwas zu suchen, oder stopp sagen, um zu beenden.',
'fr-FR': 'Vous pouvez me demander de rechercher quelque chose, ou dire stop pour quitter.',
'es-ES': 'Puedes pedirme que busque algo, o decir detener para salir.',
};
return AlexaResponse(
outputSpeech: AlexaOutputSpeech(
type: 'PlainText',
text: helpMessages[locale] ?? helpMessages['en-US']!,
),
shouldEndSession: false,
);
}
AlexaResponse _handleStop(String locale) {
final goodbyeMessages = {
'en-US': 'Goodbye!',
'de-DE': 'Auf Wiedersehen!',
'fr-FR': 'Au revoir!',
'es-ES': '¡Adiós!',
};
return AlexaResponse(
outputSpeech: AlexaOutputSpeech(
type: 'PlainText',
text: goodbyeMessages[locale] ?? goodbyeMessages['en-US']!,
),
shouldEndSession: true,
);
}
AlexaResponse _handleSessionEnd(String locale) {
return AlexaResponse(shouldEndSession: true);
}
AlexaResponse _handleUnknown(String locale) {
final errorMessages = {
'en-US': 'Sorry, I didn\'t understand that. Please try again.',
'de-DE': 'Entschuldigung, das habe ich nicht verstanden. Bitte versuche es erneut.',
'fr-FR': 'Désolé, je n\'ai pas compris. Veuillez réessayer.',
'es-ES': 'Lo siento, no entendí eso. Por favor, inténtalo de nuevo.',
};
return AlexaResponse(
outputSpeech: AlexaOutputSpeech(
type: 'PlainText',
text: errorMessages[locale] ?? errorMessages['en-US']!,
),
shouldEndSession: false,
);
}
}
Natural Language Understanding
Intent Classification with Locale Context
class NLUEngine {
final Map<String, List<IntentPattern>> _patterns = {};
void loadPatterns(String locale, List<IntentPattern> patterns) {
_patterns[locale] = patterns;
}
NLUResult classify(String utterance, String locale) {
final patterns = _patterns[locale] ?? _patterns['en'] ?? [];
final normalizedUtterance = _normalize(utterance, locale);
for (final pattern in patterns) {
final match = pattern.regex.firstMatch(normalizedUtterance);
if (match != null) {
return NLUResult(
intent: pattern.intent,
confidence: pattern.confidence,
entities: _extractEntities(match, pattern.entityGroups),
);
}
}
return NLUResult(
intent: 'unknown',
confidence: 0.0,
entities: {},
);
}
String _normalize(String text, String locale) {
String normalized = text.toLowerCase().trim();
// Remove locale-specific fillers
final fillers = {
'en': ['um', 'uh', 'like', 'you know'],
'de': ['äh', 'ähm', 'also', 'halt'],
'fr': ['euh', 'hein', 'genre', 'quoi'],
'es': ['eh', 'pues', 'bueno', 'este'],
};
for (final filler in fillers[locale] ?? []) {
normalized = normalized.replaceAll(RegExp('\\b$filler\\b'), ' ');
}
// Normalize whitespace
normalized = normalized.replaceAll(RegExp(r'\s+'), ' ');
return normalized;
}
Map<String, String> _extractEntities(RegExpMatch match, Map<String, int> groups) {
final entities = <String, String>{};
for (final entry in groups.entries) {
final value = match.group(entry.value);
if (value != null && value.isNotEmpty) {
entities[entry.key] = value;
}
}
return entities;
}
}
class IntentPattern {
final String intent;
final RegExp regex;
final double confidence;
final Map<String, int> entityGroups;
IntentPattern({
required this.intent,
required this.regex,
this.confidence = 1.0,
this.entityGroups = const {},
});
}
class NLUResult {
final String intent;
final double confidence;
final Map<String, String> entities;
NLUResult({
required this.intent,
required this.confidence,
required this.entities,
});
}
Testing Voice Localization
Unit Testing Voice Commands
void main() {
group('VoiceCommandHandler', () {
late VoiceCommandHandler handler;
setUp(() {
handler = VoiceCommandHandler();
});
test('detects English search intent', () {
expect(handler.detectIntent('search for pizza', 'en'), 'search');
expect(handler.detectIntent('find restaurants', 'en'), 'search');
expect(handler.detectIntent('look up weather', 'en'), 'search');
});
test('detects Spanish search intent', () {
expect(handler.detectIntent('buscar pizza', 'es'), 'search');
expect(handler.detectIntent('encuentra restaurantes', 'es'), 'search');
});
test('detects German navigation intent', () {
expect(handler.detectIntent('gehe zu Einstellungen', 'de'), 'navigate');
expect(handler.detectIntent('öffne Kamera', 'de'), 'navigate');
});
test('extracts parameters correctly', () {
expect(
handler.extractParameter('search for pizza', 'search', 'en'),
'pizza',
);
expect(
handler.extractParameter('buscar restaurantes', 'search', 'es'),
'restaurantes',
);
});
test('handles unknown language gracefully', () {
// Falls back to English
expect(handler.detectIntent('search for pizza', 'xx'), 'search');
});
});
}
Best Practices
Voice Localization Checklist
- Test with native speakers - Automated tests can't catch unnatural phrasing
- Use natural speech patterns - Avoid robotic-sounding translations
- Handle regional variants - en-US vs en-GB have different expectations
- Consider speech rate - Some languages are spoken faster/slower
- Provide audio confirmation - Always confirm critical actions
- Support multiple trigger phrases - Users express intents differently
Common Mistakes to Avoid
- Literal translation of voice prompts
- Ignoring regional accents in speech recognition
- Using the same speech rate for all languages
- Not providing locale-specific help commands
- Forgetting to handle voice-specific error states
Conclusion
Voice assistant localization requires thinking beyond text translation. Consider speech patterns, natural phrasing, and regional variations to create voice experiences that feel native to users in every language. Test with native speakers and iterate based on real usage data.
Related Resources
- Flutter Localization Analytics - Track voice command usage
- Flutter Accessibility Localization - Voice for accessibility
- Free ARB Editor - Manage voice response translations