/*************************************************************************/ /* */ /* Centre for Speech Technology Research */ /* University of Edinburgh, UK */ /* Copyright (c) 1997 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ /* this software and its documentation without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of this work, and to */ /* permit persons to whom this work is furnished to do so, subject to */ /* the following conditions: */ /* 1. The code must retain the above copyright notice, this list of */ /* conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* 4. The authors' names are not used to endorse or promote products */ /* derived from this software without specific prior written */ /* permission. */ /* */ /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */ /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ /* THIS SOFTWARE. */ /* */ /*************************************************************************/ /* Authors : Alan W Black (awb@cstr.ed.ac.uk) */ /* Date : January, February 1997 */ /* -------------------------------------------------------------------- */ /* */ /* A non-GNU implementation of a EST_String class to use with non-G++ */ /* compilers. */ /* */ /* Note this is not a full implementation of libg++'s EST_String class */ /* just the bits we need */ /* */ /*************************************************************************/ #include #include #include #include #include "EST_String.h" // #include "EST_error.h" #include "string_version.h" #include "EST_math.h" extern "C" { #include "regexp.h" } const char *EST_String::version = "CSTR String Class " STRING_VERSION " " STRING_DATE; const EST_String EST_String::Empty(""); EST_String EST_String_nullString = ""; struct subst { EST_String::EST_string_size start, end; char *s; int slen; } ; #if !__GSUB_REENTRANT__ static struct subst *substitutions=NULL; int num_substitutions=0; #endif /********************************************************************\ * * * Locate is the basic utility method behind many of the * * manipulations, it finds something in a EST_String, returns a * * success or failure flag and sets start and end to where it was. * * * \********************************************************************/ int EST_String::locate(const char *s, int len, int from, int &start, int &end) const { CHECK_STRING_ARG(s); const char *sub=NULL; if (!s) return 0; if (from < 0 && -from < size) { int endpos=size+from+1; int p=0; const char *nextsub; while ((nextsub=strstr(str()+p, s))) { p=nextsub-str()+1; if (p > endpos) break; sub=nextsub; } } else if (from>=0 && from <= size) sub= strstr(str()+from, s); if (sub != NULL) { start = sub-str(); end = start + len; return 1; } else { return 0; } } int EST_String::locate(EST_Regex &ex, int from, int &start, int &end, int *starts, int *ends) const { int match_start, match_end; if (from < 0 && -from < size) { int endpos=size+from+1; int p=0; int found=0; while (ex.run(str(), p, match_start, match_end, starts, ends)) { found++; start=match_start; end=match_end; p = match_start+1; if (p > endpos) break; } return found >0; } else if (from >=0 && from <= size) { if (ex.run(str(), from, match_start, match_end, starts, ends)) { start = match_start; end=match_end; return 1; } else return 0; } else return 0; } int EST_String::extract(const char *s, int len, int pos, int &start, int &end) const { CHECK_STRING_ARG(s); if (!s) return 0; if (pos < 0) return locate(s, len, 0, start, end); if (pos <= size-len && memcmp(str()+pos, s, len)==0) { start = pos; end = pos + len; return 1; } else return 0; } int EST_String::extract(EST_Regex &ex, int pos, int &start, int &end) const { int match_start, match_end; if (pos < 0) return locate(ex, 0, start, end); if (pos < size && ex.run(str(), pos, match_start, match_end) && match_start == pos) { start = match_start; end = match_end; return 1; } else return 0; } EST_String EST_String::chop_internal(int from, int len, EST_chop_direction mode) const { int start, end; if (from < 0) { start = size+from; } else { start = from; } end=start+len; if (start >=0 && end <=size && size > 0) switch (mode) { case Chop_Before: return EST_String(str(), size, 0, start); break; case Chop_At: return EST_String(str(), size, start, end-start); break; case Chop_After: return EST_String(str(), size, end, -1); } return EST_String(); } EST_String EST_String::chop_internal(const char *it, int len, int from, EST_chop_direction mode) const { CHECK_STRING_ARG(it); int start, end; if (it && locate(it, len, from, start, end)) switch (mode) { case Chop_Before: return EST_String(str(), size, 0, start); break; case Chop_At: return EST_String(str(), size, start, end-start); break; case Chop_After: return EST_String(str(), size, end, -1); } return EST_String(); } EST_String EST_String::chop_internal (EST_Regex &it, int from, EST_chop_direction mode) const { int start=0, end=0; if (locate(it, from, start, end)) switch (mode) { case Chop_Before: return EST_String(str(), size, 0, start); break; case Chop_At: return EST_String(str(), size, start, end-start); break; case Chop_After: return EST_String(str(), size, end, -1); } return EST_String(); } int EST_String::gsub_internal (const char *os, int olength, const char *s, int length) { CHECK_STRING_ARG(os); CHECK_STRING_ARG(s); int pos=0, n=0, change=0; EST_ChunkPtr new_memory; const char *from; char *to; #if __GSUB_REENTRANT__ struct subst { EST_String::EST_string_size start, end; } *substitutions=NULL; int num_substitutions=0; #endif if (s && os && size > 0 && *os != '\0') { { int start, end; while (locate(os, olength, pos, start, end)) { if (num_substitutions <= n) substitutions = wrealloc(substitutions, struct subst, (num_substitutions +=10)); substitutions[n].start = start; substitutions[n].end = end; change += length - (end-start); n++; pos=end; } } // dubious dealings with the inside of the string from = (const char *)memory; if (change > 0) { // Spurious braces make temporary ref to chunk go away {new_memory = chunk_allocate(size+change+1);} to = new_memory; } else { cp_make_updatable(memory, size); to = memory; } int i, at=0; char *p=to; for(i=0; i 0) memory = new_memory; size += change; } // cout << "gsub n=" << memory.count() << "\n"; #if __GSUB_REENTRANT__ if (substitutions) wfree(substitutions); #endif return n; } int EST_String::gsub_internal (EST_Regex &ex, const char *s, int length) { int bracket_num=-1; if (s==NULL) bracket_num = length; int pos=0, n=0, change=0; EST_ChunkPtr new_memory; const char *from; char *to; #if __GSUB_REENTRANT__ struct subst *substitutions=NULL; int num_substitutions=0; #endif // printf("match '%s'\n", (const char *)(*this)); if (size > 0) { { int start, starts[EST_Regex_max_subexpressions], ends[EST_Regex_max_subexpressions], mlen; while ((start = search(ex, mlen, pos, starts, ends))>=0) { // printf("match %d-%d, %d-%d, %d-%d\n", start, start+mlen, starts[0], ends[0], starts[1], ends[1]); if (num_substitutions <= n) substitutions = wrealloc(substitutions, struct subst, (num_substitutions +=10)); substitutions[n].start = start; substitutions[n].end = start+mlen; if (s) change += length - mlen; else { int slen = ends[bracket_num]-starts[bracket_num]; change += slen - mlen; substitutions[n].slen = slen; substitutions[n].s = walloc(char, slen); memcpy(substitutions[n].s, (const char *)memory+starts[bracket_num], slen); } n++; pos=start+mlen; } } // dubious dealings with the inside of the string from = (const char *)memory; if (change > 0) { // Spurious braces make temporary ref to chunk go away {new_memory = chunk_allocate(size+change+1);} to = new_memory; } else { cp_make_updatable(memory, size); to = memory; } int i, at=0; char *p=to; for(i=0; i 0) memory = new_memory; size += change; } #if __GSUB_REENTRANT__ if (substitutions) wfree(substitutions); #endif return n; } int EST_String::subst(EST_String source, int (&starts)[EST_Regex_max_subexpressions], int (&ends)[EST_Regex_max_subexpressions]) { int n=0, change=0; EST_ChunkPtr new_memory; const char *from; char *to; #if __GSUB_REENTRANT__ struct subst *substitutions=NULL; int num_substitutions=0; #endif // printf("match '%s'\n", (const char *)(*this)); int i; if (size > 0) { int escaped=0; for(i=0; i= '0' &&memory[i] <= '9') { int snum = memory[i] - '0'; if (ends[snum] >= 0 && starts[snum] >=0) { if (num_substitutions <= n) substitutions = wrealloc(substitutions, struct subst, (num_substitutions +=10)); substitutions[n].start = i-1; substitutions[n].end = i+1; substitutions[n].s = ((char *)(void *)(const char *)source.memory) + starts[snum]; substitutions[n].slen = ends[snum] - starts[snum]; change += substitutions[n].slen - 2; n++; } } escaped=0; } else if (memory[i] == '\\') escaped=1; } // dubious dealings with the inside of the string from = (const char *)memory; if (change > 0) { // Spurious braces make temporary ref to chunk go away {new_memory = chunk_allocate(size+change+1);} to = new_memory; } else { cp_make_updatable(memory, size); to = memory; } int at=0; char *p=to; for(i=0; i 0) memory = new_memory; size += change; } #if __GSUB_REENTRANT__ if (substitutions) wfree(substitutions); #endif return n; } // Pass in the two possible separators as pointers so we don't have to // duplicate all the code. Inline definitions of the friend functions // takes care of the pretty interface. int EST_String::split_internal(EST_String result[], int max, const char *s_seperator, int slen, EST_Regex *re_seperator, char quote) const { int n=0; int pos=0; int start, end; int lastspace=0; if (size>0) { while (pos < length()) { start= -1; end= -1; if ((*this)(pos) == quote) { start=pos; pos++; while (pos < length()) { if ((*this)(pos) == quote) { pos++; if ((*this)(pos) != quote) break; else pos++; } else pos++; } end=pos; } else { int mstart, mend, matched; if (s_seperator) matched = locate(s_seperator, slen, pos, mstart, mend); else matched = locate(*re_seperator, pos, mstart, mend); if (matched) if (mstart != pos) { start=pos; end=mstart; pos=mend; lastspace=mend; } else if (pos ==lastspace) { start=pos; end=pos; pos=mend; lastspace=mend; } else { pos=mend; lastspace=mend; } else { start=pos; end=length(); pos=end; } } if (start>=0) result[n++] = EST_String(*this, start, end-start); if (n==max) break; } } return n; } int EST_String::matches(const char *s, int pos) const { CHECK_STRING_ARG(s); int start, end; if (!s) return 0; int len=safe_strlen(s); if (extract(s, len, pos, start, end)) return start==pos && end==len; else return 0; } int EST_String::matches(const EST_String &s, int pos) const { int start, end; if (extract(s.str(), s.size, pos, start, end)) return start==pos && end==s.size; else return 0; } int EST_String::matches(EST_Regex &e, int pos, int *starts, int *ends) const { if (size==0) return e.run_match("", pos, starts, ends) >0; else return e.run_match(str(), pos, starts, ends) >0; } EST_String operator + (const EST_String &a, const char *b) { CHECK_STRING_ARG(b); int al = a.size; int bl = safe_strlen(b); if (al == 0) return EST_String(b, 0, bl); if (bl == 0) return EST_String(a); EST_ChunkPtr c = chunk_allocate(al+bl+1, a.str(), al); if (bl>0) memmove((char *)c + al, b, bl); c(al+bl)='\0'; return EST_String(al+bl, c); } EST_String operator + (const EST_String &a, const EST_String &b) { int al = a.size; int bl = b.size; if (al == 0) return EST_String(b); if (bl == 0) return EST_String(a); EST_ChunkPtr c = chunk_allocate(al+bl+1, a.str(), al); memmove((char *)c+al,b.str(),bl); c(al+bl)='\0'; return EST_String(al+bl, c); } EST_String operator + (const char *a, const EST_String &b) { CHECK_STRING_ARG(a); int al = safe_strlen(a); int bl = b.size; if (bl == 0) return EST_String(a, 0, al); if (al == 0) return EST_String(b); EST_ChunkPtr c = chunk_allocate(al+bl+1, a, al); memmove((char *)c + al, b.str(), bl); c(al+bl)='\0'; return EST_String(al+bl, c); } EST_String operator * (const EST_String &s, int n) { if (n<1) return ""; int l = s.length(); int sz = n * l; EST_String it(NULL, 0, sz); for(int j=0; j0) memmove((char *)memory + size,b.str(),bl); memory(size+bl)='\0'; size += bl; return *this; } EST_String::EST_String(const char *s) { CHECK_STRING_ARG(s); size=safe_strlen(s); if (size != 0) memory = chunk_allocate(size+1, s, size); else memory=NULL; } EST_String::EST_String(const char *s, int start_or_fill, int len) { if (s) { int start= start_or_fill; if (len <0) len=safe_strlen(s)-start; size=len; if (size != 0) memory = chunk_allocate(len+1, s+start, len); else memory=NULL; } else { char fill = start_or_fill; if (len<0) len=0; size=len; if (size != 0) { memory = chunk_allocate(len+1); char *p = memory; for(int j=0; j 0) return (s << str.str()); else return (s << ""); } EST_String EST_String::cat(const EST_String s1, const EST_String s2, const EST_String s3, const EST_String s4, const EST_String s5, const EST_String s6, const EST_String s7, const EST_String s8, const EST_String s9 ) { int len=(s1.length()+s2.length()+s3.length()+s4.length()+s5.length() + s6.length()+s7.length()+s8.length()+s9.length()); EST_String result; result.size=len; result.memory= chunk_allocate(len+1, (const char *)s1, s1.length()); int p = s1.length(); if (s2.length()) { strncpy((char *)result.memory + p, (const char *)s2, s2.length()); p += s2.length(); } if (s3.length()) { strncpy((char *)result.memory + p, (const char *)s3, s3.length()); p += s3.length(); } if (s4.length()) { strncpy((char *)result.memory + p, (const char *)s4, s4.length()); p += s4.length(); } if (s5.length()) { strncpy((char *)result.memory + p, (const char *)s5, s5.length()); p += s5.length(); } if (s6.length()) { strncpy((char *)result.memory + p, (const char *)s6, s6.length()); p += s6.length(); } if (s7.length()) { strncpy((char *)result.memory + p, (const char *)s7, s7.length()); p += s7.length(); } if (s8.length()) { strncpy((char *)result.memory + p, (const char *)s8, s8.length()); p += s8.length(); } if (s9.length()) { strncpy((char *)result.memory + p, (const char *)s9, s9.length()); p += s9.length(); } result.memory(p) = '\0'; return result; } int compare(const EST_String &a, const EST_String &b) { if (a.size == 0 && b.size == 0) return 0; else if (a.size == 0) return -1; else if (b.size == 0) return 1; else return strcmp(a.str(), b.str()); } int compare(const EST_String &a, const char *b) { if (a.size == 0 && (b==NULL || *b == '\0')) return 0; else if (a.size == 0) return -1; else if (b == NULL || *b == '\0') return 1; else return strcmp(a.str(), b); } int fcompare(const EST_String &a, const EST_String &b, const unsigned char *table) { if (a.size == 0 && b.size == 0) return 0; else if (a.size == 0) return -1; else if (b.size == 0) return 1; else return EST_strcasecmp(a.str(), b.str(), table); } int fcompare(const EST_String &a, const char *b, const unsigned char *table) { int bsize = (b ? strlen((const char *)b) : 0); if (a.size == 0 && bsize == 0) return 0; else if (a.size == 0) return -1; else if (bsize == 0) return 1; else return EST_strcasecmp(a.str(), (const char *)b, table); } int operator == (const char *a, const EST_String &b) { CHECK_STRING_ARG(a); if (!a) return 0; else if (b.size==0) return *a == '\0'; else return (*a == b(0)) && strcmp(a, b.str())==0; } int operator == (const EST_String &a, const EST_String &b) { if (a.size==0) return b.size == 0; else if (b.size == 0) return 0; else return a.size == b.size && a(0) == b(0) && memcmp(a.str(),b.str(),a.size)==0; }; EST_String EST_String::Number(int i, int b) { char buf[64]; const char *format; switch (b) { case 8: format="0%o"; break; case 10: format="%d"; break; case 16: format="0x%x"; break; default: format="??%d??"; break; } sprintf(buf, format, i); return EST_String(buf); } EST_String EST_String::Number(long i, int b) { char buf[64]; const char *format; switch (b) { case 8: format="0%lo"; break; case 10: format="%ld"; break; case 16: format="0x%lx"; break; default: format="??%ld??"; break; } sprintf(buf, format, i); return EST_String(buf); } EST_String EST_String::Number(float f) { char buf[64]; sprintf(buf, "%f", f); return EST_String(buf); } EST_String EST_String::Number(double d) { char buf[64]; sprintf(buf, "%f", d); return EST_String(buf); } long EST_String::Long(bool *valid) const { char *end; long val = strtol(str(), &end, 10); if (end==NULL|| *end != '\0') { if (valid != NULL) { *valid=0; return 0L; } else { printf("bad integer number format '%s'\n", (const char *)str()); exit(0); } } if (valid) *valid=1; return val; } int EST_String::Int(bool *valid) const { long val = Long(valid); if (valid && !*valid) return 0L; if (val > INT_MAX || val < INT_MIN) { if (valid != NULL) { *valid=0; return 0L; } else { printf("number out of range for integer %ld", val); exit(0); } } return val; } double EST_String::Double(bool *valid) const { char *end; double val = strtod(str(), &end); if (end==NULL|| *end != '\0') { if (valid != NULL) { *valid=0; return 0.0; } else { printf("bad decimal number format '%s'", (const char *)str()); exit(0); } } if (valid) *valid=1; return val; } float EST_String::Float(bool *valid) const { double val = Double(valid); if (valid && !*valid) return 0.0; if (val > FLT_MAX || val < -FLT_MAX) { if (valid != NULL) { *valid=0; return 0.0; } else { printf("number out of range for float %f", val); exit(0); } } return val; }