/*************************************************************************/ /* */ /* Centre for Speech Technology Research */ /* University of Edinburgh, UK */ /* Copyright (c) 1994,1995,1996 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ /* this software and its documentation without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of this work, and to */ /* permit persons to whom this work is furnished to do so, subject to */ /* the following conditions: */ /* 1. The code must retain the above copyright notice, this list of */ /* conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* 4. The authors' names are not used to endorse or promote products */ /* derived from this software without specific prior written */ /* permission. */ /* */ /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */ /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ /* THIS SOFTWARE. */ /* */ /*************************************************************************/ /* Author : Paul Taylor */ /* Date : June 1994 */ /*-----------------------------------------------------------------------*/ /* EST_Track file manipulation program */ /* */ /*=======================================================================*/ #include "EST.h" #include "EST_cmd_line_options.h" #define DEFAULT_TIME_SCALE 0.001 int StrListtoIList(EST_StrList &s, EST_IList &il); void extract_channel(EST_Track &orig, EST_Track &nt, EST_IList &ch_list); EST_write_status save_snns_pat(const EST_String filename, EST_TrackList &inpat, EST_TrackList &outpat); EST_read_status read_TrackList(EST_TrackList &tlist, EST_StrList &files, EST_Option &al); void extract(EST_Track &tr, EST_Option &al); /** @name ch_track Track file manipulation * @id ch-track-manual * @toc */ //@{ /**@name Synopsis */ //@{ //@synopsis /** ch_track is used to manipulate the format of a track file. Operations include: file format conversion smoothing changing the frame spacing of a track (resampling) producing differentiated and delta tracks Using a threshold to convert a track file to a label file making multiple input files into a single multi-channel output file extracting a single channel from a multi-channel track extracting a time-delimited portion of the waveform */ //@} /**@name Options */ //@{ //@options //@} int main(int argc, char *argv[]) { EST_String in_file("-"), out_file("-"); EST_Option al, settings; EST_String fname, ftmp; EST_StrList files; EST_Track tr; EST_TrackList trlist; EST_Litem *p; parse_command_line( argc, argv, EST_String("[input file] -o [output file] [options]\n")+ "Summary: change/copy track files\n" "use \"-\" to make input and output files stdin/out\n" "-h Options help\n"+ options_track_input()+ "\n"+ options_track_output()+ "\n" "-info Print information about file and header. \n" " This option gives useful information such as file \n" " length, file type, channel names. No output is produced\n\n" "-track_names \n" " File containing new names for output channels\n\n" "-diff Differentiate contour. This performs simple \n" " numerical differentiation on the contour by \n" " subtracting the amplitude of the current frame \n" " from the amplitude of the next. Although quick, \n" " this technique is crude and not recommende as the \n" " estimation of the derivate is done on only one point\n\n" "-delta Make delta coefficients (better form of differentiate).\n" " The argument to this option is the regression length of \n" " of the delta calculation and can be between 2 and 4 \n\n" "-sm Length of smoothing window in seconds. Various types of \n" " smoothing are available for tracks. This options specifies \n" " length of the smooting window which effects the degree of \n" " smoothing, i.e. a longer value means more smoothing \n\n" "-smtype Smooth type, median or mean\n" "-style Convert track to other form. Currently only one form \n" " \"label\" is supported. This uses a specified cut off to \n" " make a label file, with two labels, one for above the \n" " cut off (-pos) and one for below (-neg)\n\n" "-t threshold for track to label conversion \n" "-neg Name of negative label in track to label conversion \n" "-pos Name of positive label in track to label conversion \n" "-pc Combine given tracks in parallel. If option \n" " is longest, pad shorter tracks to longest, else if \n" " first pad/cut to match first input track \n" + options_track_filetypes_long(), files, al); /*redundant options "-time_channel \n"+ " Which track in track file holds pitchmark times\n"+ "-time_scale \n"+ " Scale of pitchmarks (default 0.001 = milliseconds)\n"+ */ override_lib_ops(settings, al); out_file = al.present("-o") ? al.val("-o") : (EST_String)"-"; EST_TokenStream ts; // ts.open(files.first()); // tr.load(ts); // cout << tr; if (read_TrackList(trlist, files, al) != read_ok) exit(0); if (files.length() == 0) { cerr << argv[0] << ": no input files specified\n"; exit(-1); } if (al.present("-info")) { for (p = trlist.head(); p; p = p->next()) track_info(trlist(p)); exit(0); } if (al.present("-pc")) // parallelize them ParallelTracks(tr, trlist, al.val("-pc")); else if (al.val("-otype", 0) == "snns") { // sometime this will generalise for multiple input files EST_TrackList inpat, outpat; inpat.append(trlist.nth(0)); outpat.append(trlist.nth(1)); save_snns_pat(out_file, inpat, outpat); exit(0); } else // concatenate them { tr.resize(0, tr.num_channels()); // Reorg -- fix += to resize to largest num_channels (with warning) for (p = trlist.head(); p; p = p->next()) tr += trlist(p); } if (al.present("-S")) tr.sample(al.fval("-S")); if (al.present("-sm")) { track_smooth(tr, al.fval("-sm"),al.val("-smtype")); } if (al.present("-diff") && al.present("-delta")) { cerr << "Using -diff and -delta together makes no sense !\n"; exit(-1); } if (al.present("-diff")) { tr = differentiate(tr); } if (al.present("-delta")) { EST_Track ntr = tr; // to copy size !; delta(tr,ntr,al.ival("-delta")); tr = ntr; } if (al.present("-c")) { EST_StrList s; EST_Track ntr; EST_IList il; StringtoStrList(al.val("-c"), s, " ,"); // separator can be space or comma StrListtoIList(s, il); extract_channel(tr, ntr, il); tr = ntr; } if (al.present("-start") || al.present("-end") || al.present("-to") || al.present("-from")) extract(tr, al); // tr.assign_map(&LPCTrackMap); // tr.set_space_type("VARI"); // optionally rename output tracks before saving if (al.present("-track_names")) { EST_StrList new_names; if(load_StrList(al.val("-track_names"),new_names) != format_ok) { cerr << "Failed to load new track names file." << endl; exit(-1); } /* if (tr.num_channels() != new_names.length()) { cerr << "Number of names in output track names file ("; cerr << new_names.length() << ") " << endl; cerr << " does not match number of output channels ("; cerr << tr.num_channels() << ")" << endl; exit(-1); } EST_Litem *np; int ni; for (np = new_names.head(),ni=0; np; np = np->next(),ni++) tr.set_channel_name(new_names(np),ni); */ tr.resize(EST_CURRENT, new_names); } // track_info(tr); /* tr.resize(EST_CURRENT, 10); cout << "new\n"; track_info(tr); EST_StrList x; x.append("a"); x.append("c"); x.append("d"); cout << "new\n"; track_info(tr); */ // Write out file in appropriate format if (al.val("-style",0) == "label") { EST_Relation lab; if (al.present("-t")) track_to_label(tr, lab, al.fval("-t")); else track_to_label(tr, lab); if (al.present("-pos")) change_label(lab, "pos", al.val("-pos")); if (al.present("-neg")) change_label(lab, "neg", al.val("-neg")); if (lab.save(out_file) != write_ok) exit(-1); } /* else if (al.val("-style",0) == "pm") { EST_Relation lab; if (!al.present("-f")) { cerr << "must specify sample rate (with -f) for pm style\n"; exit(-1); } int sample_rate = al.ival("-f", 0); track_to_pm(tr, sample_rate, lab); if (lab.save(out_file) != write_ok) exit(-1); } */ else { if (tr.save(out_file, al.val("-otype")) != write_ok) exit(-1); } return 0; } void override_lib_ops(EST_Option &a_list, EST_Option &al) { a_list.override_val("ishift", al.val("-s", 0)); a_list.override_val("color", al.val("-color", 0)); a_list.override_val("in_track_file_type", al.val("-itype", 0)); a_list.override_val("out_track_file_type", al.val("-otype", 0)); a_list.override_val("tr_to_label_thresh", al.val("-t", 0)); a_list.override_fval("time_scale", DEFAULT_TIME_SCALE); if (al.val("-style", 0) == "label") a_list.override_val("lab_file_type", al.val("-otype", 0)); if (al.present("-time_scale")) a_list.override_fval("time_scale", al.fval("-time_scale", 1)); if (al.present("-time_channel")) a_list.override_val("time_channel", al.sval("-time_channel", 1)); } /** @name Making multiple tracks into a single track If multiple input files are specified, by default they are concatenated into the output file. $ ch_track kdt_010.tr kdt_011.tr kdt_012.tr kdt_013.tr -o out.tr In the above example, 4 multi channel input files are converted to one single channel output file. Multi-channel tracks can concatenated provided they all have the same number of input channels. Multiple input files can be made into a multi-channel output file by using the -pc option: $ ch_track kdt_010.tr kdt_011.tr kdt_012.tr kdt_013.tr -o -pc longest out.tr The argument to -pc can either be longest, in which the output track is the length of the longest input file, or first in which it is the length of the first input file. */ //@{ //@} /** @name Extracting channels from multi-channel tracks The -c option is used to specify channels which should be extracted from the input. If the input is a 4 channel track, $ ch_track kdt_m.tr -o a.tr -c "0 2" will extract the 0th and 2nd channel (counting starts from 0). The argument to -c can be either a single number of a list of numbers (wrapped in quotes). */ //@{ //@} /** @name Extracting of a single region from a track There are several ways of extracting a region of a track. The simplest way is by using the start, end, to and from commands to delimit a sub portion of the input track. For example $ ch_track kdt_010.tr -o small.tr -start 1.45 -end 1.768 extracts a subtrack starting at 1.45 seconds and extending to 1.768 seconds. alternatively, $ ch_track kdt_010.tr -o small.tr -from 50 -to 100 extracts a subtrack starting at 50 frames and extending to 100 frames. Times and frames can be mixed in sub-track extraction. The output track will have the same number of channels as the input track. */ //@{ //@} /** @name Adding headers and format conversion It is usually a good idea for all track files to have headers as this way different files can be handled safely. ch_track provides a means of adding headers to unheadered files. These files are assumed to be ascii floats with one channel per line. The following adds a header to an ascii file. $ ch_track kdt_010.atr -o kdt_010.h5.tr -otype est -s 0.01 ch_track can change the frame shift of a fixed frame file, or convert a variable frame shift file into a fixed frame shift. At present this is done with a very crude resampling technique and hence the output file may suffer from anti-aliasing distortion. Change to a frame spacing of 0.02 seconds: $ ch_track kdt_010.tr -o kdt_010.tr2 -S 0.02 */ //@{ //@} //@}