FreeTTSWordToSound xref

View Javadoc

1   /*
2    * JCaptcha, the open source java framework for captcha definition and integration
3    * Copyright (c)  2007 jcaptcha.net. All Rights Reserved.
4    * See the LICENSE.txt file distributed with this package.
5    */
6   
7   package com.octo.captcha.component.sound.wordtosound;
8   
9   import java.io.ByteArrayInputStream;
10  import java.io.IOException;
11  import java.io.InputStream;
12  import java.io.SequenceInputStream;
13  import java.util.Locale;
14  import java.util.Vector;
15  
16  import javax.sound.sampled.AudioFormat;
17  import javax.sound.sampled.AudioInputStream;
18  
19  import com.octo.captcha.CaptchaException;
20  import com.octo.captcha.component.sound.soundconfigurator.SoundConfigurator;
21  import com.sun.speech.freetts.Voice;
22  import com.sun.speech.freetts.VoiceManager;
23  import com.sun.speech.freetts.audio.AudioPlayer;
24  import com.sun.speech.freetts.util.Utilities;
25  
26  /***
27   * WordToSound implementation with FreeTTS an openSource Text To Speech implementation.
28   *
29   * @author Benoit
30   * @version 1.0
31   */
32  public class FreeTTSWordToSound extends AbstractWordToSound implements WordToSound {
33      public static String defaultVoiceName = "kevin16";
34  
35      public static String defaultVoicePackage = "com.sun.speech.freetts.en.us.cmu_time_awb.AlanVoiceDirectory,com.sun.speech.freetts.en.us.cmu_us_kal.KevinVoiceDirectory";
36  
37      //private static String FREETTS_PROPERTIES_KEY = "freetts.voices";
38  
39  
40      private Voice voice = null;
41  
42      /***
43       * default Voice, allocated at instanciation
44       */
45      private Voice defaultVoice = null;
46  
47      private VoiceManager voiceManager = null;
48  
49  
50      /***
51       * Constructor for a FreeTTS implmentation of WordToSound. This constructor imply that WordToSound only use one
52       * voice define by voiceName, with its own locale
53  
54       * @param minAcceptedWordLength Length Minimal of generated words
55       * @param maxAcceptedWordLength Length Maximal of generated words
56       */
57      public FreeTTSWordToSound(SoundConfigurator configurator, int minAcceptedWordLength,
58                                int maxAcceptedWordLength) {
59          super(configurator, minAcceptedWordLength, maxAcceptedWordLength);
60          //Voices use by freeTTS, we define where they are, currently in the java en_us.jar
61          //add the package
62          //addToSystemesPropetites(this.configurator.getLocation());
63  
64          // The VoiceManager manages all the voices for FreeTTS.
65          voiceManager = VoiceManager.getInstance();
66  
67          this.defaultVoice = voiceManager.getVoice(this.configurator.getName());
68          if (this.defaultVoice == null) {
69              throw new RuntimeException("unable to load voice :'" + this.configurator.getName() + "'");
70          }
71  
72          configureVoice(this.defaultVoice);
73      }
74  
75      /***
76       * @see com.octo.captcha.component.sound.wordtosound.WordToSound#getSound(java.lang.String)
77       */
78      public AudioInputStream getSound(String word) throws CaptchaException {
79          //return a sound generated with the default voice.
80          voice = defaultVoice;      
81          
82          return addEffects(stringToSound(word));
83      }
84  
85      /***
86       * @see WordToSound#getSound(String, java.util.Locale)
87       */
88      public AudioInputStream getSound(String word, Locale locale) throws CaptchaException {
89          return getSound(word);
90          /*TODO  uncomment this when freetts supports new languages
91          Voice[] voices = voiceManager.getVoices();
92          Voice selectedVoice = null;
93  
94          //if the default voice is corresponding
95          if (defaultVoice.getLocale().equals(locale))
96          {
97              voice = defaultVoice;
98          }
99          else
100         {
101             //try to find a voice corresponding to the locale
102             for (int i = 0; i < voices.length; i++)
103             {
104                 if (voices[i].getLocale().equals(locale))
105                 {
106                     selectedVoice = voices[i];
107                 }
108             }
109 
110             if (selectedVoice != null)
111             {
112                 selectedVoice.allocate();
113                 voice = selectedVoice;
114                 configureVoice(voice);
115             }
116             else
117             {
118                 throw new CaptchaException("No voice corresponding to the Locale");
119             }
120         }
121 
122         return addEffects(stringToSound(word)); */
123     }
124 
125     /***
126      * @see com.octo.captcha.component.sound.wordtosound.AbstractFreeTTSWordToSound#addEffects(javax.sound.sampled.AudioInputStream)
127      */
128     protected AudioInputStream addEffects(AudioInputStream sound) {
129         return sound;
130     }
131 
132 
133 
134     /***
135      * Configue the voice with the SoundConfigurator
136      */
137     private void configureVoice(Voice voice) {
138         voice.setPitch(configurator.getPitch());
139         voice.setVolume(configurator.getVolume());
140         voice.setRate(configurator.getRate());
141     }
142 
143     /***
144      * Main method for this service Return an image with the specified. Synchronisation is very important, for multi
145      * threading execution
146      *
147      * @param sentence Written sentece to transform into speech
148      *
149      * @return the generated sound
150      *
151      * @throws com.octo.captcha.CaptchaException
152      *          if word is invalid or an exception occurs during the sound generation
153      */
154     public synchronized AudioInputStream stringToSound(String sentence) throws CaptchaException {
155         //use the custom (see inner class) InputStreamAudioPlayer, which provide interface to
156         // Audio Stream
157         InputStreamAudioPlayer audioPlayer = new InputStreamAudioPlayer();
158 
159         this.voice.setAudioPlayer(audioPlayer);
160 
161         // Allocates the resources for the voice.
162         this.voice.allocate();
163         
164         // Synthesize speech.
165         this.voice.speak(sentence);
166 
167         this.voice.deallocate();
168         
169         AudioInputStream ais = audioPlayer.getAudioInputStream();
170         return ais;
171     }
172 
173     /***
174      * Implementation of freeTTS AudioPlayer interface, to produce an audioInputStream, this is not a very clean way
175      * since it doesn't really play. But it is the only way to get a stream easily
176      */
177     private class InputStreamAudioPlayer implements AudioPlayer {
178         private boolean debug = false;
179 
180         private AudioFormat currentFormat = null;
181 
182         private byte[] outputData;
183 
184         private int curIndex = 0;
185 
186         private int totBytes = 0;
187 
188         private Vector outputList;
189 
190         private AudioInputStream audioInputStream;
191 
192         /***
193          * Constructs a InputStreamAudioPlayer
194          *
195          */
196         public InputStreamAudioPlayer() {
197             debug = Utilities.getBoolean("com.sun.speech.freetts.audio.AudioPlayer.debug");
198             outputList = new Vector();
199         }
200 
201         /***
202          * Sets the audio format for this player
203          *
204          * @param format the audio format
205          *
206          * @throws UnsupportedOperationException if the line cannot be opened with the given format
207          */
208         public synchronized void setAudioFormat(AudioFormat format) {
209             currentFormat = format;
210         }
211 
212         /***
213          * Gets the audio format for this player
214          *
215          * @return format the audio format
216          */
217         public AudioFormat getAudioFormat() {
218             return currentFormat;
219         }
220 
221         /***
222          * Pauses audio output
223          */
224         public void pause() {
225         }
226 
227         /***
228          * Resumes audio output
229          */
230         public synchronized void resume() {
231         }
232 
233         /***
234          * Cancels currently playing audio
235          */
236         public synchronized void cancel() {
237         }
238 
239         /***
240          * Prepares for another batch of output. Larger groups of output (such as all output associated with a single
241          * FreeTTSSpeakable) should be grouped between a reset/drain pair.
242          */
243         public synchronized void reset() {
244         }
245 
246         /***
247          * Starts the first sample timer
248          */
249         public void startFirstSampleTimer() {
250         }
251 
252         /***
253          * Closes this audio player
254          */
255         public synchronized void close() {
256             try {
257                 audioInputStream.close();
258             } catch (IOException ioe) {
259                 System.err.println("Problem while closing the audioInputSteam");
260             }
261 
262         }
263 
264         public AudioInputStream getAudioInputStream() {
265             InputStream tInputStream = new SequenceInputStream(outputList.elements());
266             AudioInputStream tAudioInputStream = new AudioInputStream(tInputStream, currentFormat,
267                     totBytes / currentFormat.getFrameSize());
268 
269             return tAudioInputStream;
270         }
271 
272         /***
273          * Returns the current volume.
274          *
275          * @return the current volume (between 0 and 1)
276          */
277         public float getVolume() {
278             return 1.0f;
279         }
280 
281         /***
282          * Sets the current volume.
283          *
284          * @param volume the current volume (between 0 and 1)
285          */
286         public void setVolume(float volume) {
287         }
288 
289         /***
290          * Starts the output of a set of data. Audio data for a single utterance should be grouped between begin/end
291          * pairs.
292          *
293          * @param size the size of data between now and the end
294          */
295         public void begin(int size) {
296             outputData = new byte[size];
297             curIndex = 0;
298         }
299 
300         /***
301          * Marks the end of a set of data. Audio data for a single utterance should be groupd between begin/end pairs.
302          *
303          * @return true if the audio was output properly, false if the output was cancelled or interrupted.
304          */
305         public boolean end() {
306             outputList.add(new ByteArrayInputStream(outputData));
307             totBytes += outputData.length;
308             return true;
309         }
310 
311         /***
312          * Waits for all queued audio to be played
313          *
314          * @return true if the audio played to completion, false if the audio was stopped
315          */
316         public boolean drain() {
317             return true;
318         }
319 
320         /***
321          * Gets the amount of played since the last mark
322          *
323          * @return the amount of audio in milliseconds
324          */
325         public synchronized long getTime() {
326             return -1L;
327         }
328 
329         /***
330          * Resets the audio clock
331          */
332         public synchronized void resetTime() {
333         }
334 
335         /***
336          * Writes the given bytes to the audio stream
337          *
338          * @param audioData audio data to write to the device
339          *
340          * @return <code>true</code> of the write completed successfully, <code> false </code> if the write was
341          *         cancelled.
342          */
343         public boolean write(byte[] audioData) {
344             return write(audioData, 0, audioData.length);
345         }
346 
347         /***
348          * Writes the given bytes to the audio stream
349          *
350          * @param bytes  audio data to write to the device
351          * @param offset the offset into the buffer
352          * @param size   the size into the buffer
353          *
354          * @return <code>true</code> of the write completed successfully, <code> false </code> if the write was
355          *         cancelled.
356          */
357         public boolean write(byte[] bytes, int offset, int size) {
358             System.arraycopy(bytes, offset, outputData, curIndex, size);
359             curIndex += size;
360             return true;
361         }
362 
363         /***
364          * Returns the name of this audioplayer
365          *
366          * @return the name of the audio player
367          */
368         public String toString() {
369             return "AudioInputStreamAudioPlayer";
370         }
371 
372         /***
373          * Outputs a debug message if debugging is turned on
374          *
375          * @param msg the message to output
376          */
377         private void debugPrint(String msg) {
378             if (debug) {
379                 System.out.println(toString() + ": " + msg);
380             }
381         }
382 
383         /***
384          * Shows metrics for this audio player
385          */
386         public void showMetrics() {
387         }
388     }
389 
390 }
391