|
#include "AbbrStra.h" |
|
#include <runn.h> |
|
#include <vector> |
|
#include <fstream> |
|
#include <iostream> |
|
|
|
|
|
WordData::WordData(const char *wrdnam, const char *stpnam, |
|
const char *lfsnam) : |
|
wrdset(wrdnam), stp(stpnam), lfs(lfsnam) |
|
{ |
|
wrdset.set_path_name("Ab3P"); |
|
wrdset.gopen_ctable_map(); |
|
stp.set_path_name("Ab3P"); |
|
stp.gopen_htable_map(); |
|
lfs.set_path_name("Ab3P"); |
|
lfs.gopen_htable_map(); |
|
} |
|
|
|
WordData::~WordData() |
|
{ |
|
wrdset.gclose_ctable_map(); |
|
stp.gclose_htable_map(); |
|
lfs.gclose_htable_map(); |
|
} |
|
|
|
|
|
AbbrStra::AbbrStra() |
|
{ |
|
npairs = tpairs = nsfs = nmatchs = amatchs = 0; |
|
} |
|
|
|
|
|
AbbrStra::~AbbrStra() |
|
{ |
|
} |
|
|
|
|
|
void AbbrStra::token(const char *str, char lst[1000][1000]) |
|
{ |
|
long i,j=0,k=0; |
|
long n=strlen(str)-1; |
|
|
|
while(isblank(str[n])) n--; |
|
|
|
while(str[j]){ |
|
while(isblank(str[j]))j++; |
|
i=j; |
|
while((str[j])&&(!isblank(str[j])))j++; |
|
strncpy(lst[k],str+i,j-i); |
|
lst[k][j-i]='\0'; |
|
if(str[j]){ |
|
k++; |
|
j++; |
|
} |
|
} |
|
if((j-1)>n) k--; |
|
ntk=k+1; |
|
} |
|
|
|
|
|
long AbbrStra::tokenize(const char *str, char lst[1000][1000]) |
|
{ |
|
long i,j=0,k=0; |
|
long n=strlen(str)-1; |
|
|
|
while(isblank(str[n])) n--; |
|
|
|
while(str[j]){ |
|
while(isblank(str[j]))j++; |
|
i=j; |
|
while((str[j])&&(!isblank(str[j])))j++; |
|
strncpy(lst[k],str+i,j-i); |
|
lst[k][j-i]='\0'; |
|
if(str[j]){ |
|
k++; |
|
j++; |
|
} |
|
} |
|
if((j-1)>n) k--; |
|
return k+1; |
|
} |
|
|
|
|
|
long AbbrStra::num_token(const char *str) |
|
{ |
|
long i,j=0,k=0; |
|
long n=strlen(str)-1; |
|
|
|
while(isblank(str[n])) n--; |
|
|
|
while(str[j]){ |
|
while(isblank(str[j]))j++; |
|
i=j; |
|
while((str[j])&&(!isblank(str[j])))j++; |
|
if(str[j]){ |
|
k++; |
|
j++; |
|
} |
|
} |
|
if((j-1)>n) k--; |
|
return k+1; |
|
} |
|
|
|
|
|
|
|
long AbbrStra::first_ch(const char *str, char *fch, long num) |
|
{ |
|
long i, j, numtk; |
|
char tk[1000][1000]; |
|
|
|
numtk = tokenize(str,tk); |
|
if(num>numtk) return 0; |
|
|
|
for(i=0; i<num; i++) |
|
fch[i] = tk[numtk-num+i][0]; |
|
|
|
return 1; |
|
} |
|
|
|
long AbbrStra::is_upperal(const char *str) |
|
{ |
|
for(long i=strlen(str)-1; i>=0; i--) |
|
if(!isupper(str[i]) || !isalpha(str[i])) |
|
return 0; |
|
return 1; |
|
} |
|
|
|
long AbbrStra::is_alpha(const char *str) |
|
{ |
|
for(long i=strlen(str)-1; i>=0; i--) |
|
if(!isalpha(str[i])) |
|
return 0; |
|
return 1; |
|
} |
|
|
|
|
|
|
|
void AbbrStra::str_tolower(const char *str1, char *str2) |
|
{ |
|
long i=0; |
|
|
|
while(str1[i]) { |
|
str2[i] = tolower(str1[i]); |
|
i++; |
|
} |
|
str2[i] = '\0'; |
|
} |
|
|
|
|
|
long AbbrStra::get_str(const char *str1, char *str2, long num) |
|
{ |
|
char ch, tk[1000][1000]; |
|
long i, j, numtk; |
|
|
|
if(num<0) { cout<<"num<0\n"; exit(1); } |
|
numtk = tokenize(str1,tk); |
|
if(numtk<num) return 0; |
|
|
|
strcpy(str2,tk[numtk-num]); |
|
for(i=1; i<num; i++) { |
|
strcat(str2," "); |
|
strcat(str2,tk[numtk-num+i]); |
|
} |
|
|
|
return 1; |
|
} |
|
|
|
bool AbbrStra::isupper_str(const char *str) |
|
{ |
|
long i, len=strlen(str); |
|
|
|
for(i=0; i<len; i++) |
|
if(isalpha(str[i]) && !isupper(str[i])) |
|
return false; |
|
|
|
return true; |
|
} |
|
|
|
bool AbbrStra::is_onealpha(const char *str) |
|
{ |
|
long i, j=0, len=strlen(str); |
|
|
|
for(i=0; i<len; i++) |
|
if(isalpha(str[i])) j++; |
|
|
|
if(j==1) return true; |
|
else return false; |
|
} |
|
|
|
long AbbrStra::count_upperstr(const char *str) |
|
{ |
|
long i, j, k, numtk; |
|
char tk[1000][1000]; |
|
|
|
numtk = tokenize(str,tk); |
|
|
|
j = 0; |
|
for(i=numtk-1; i>=0; i--) { |
|
if(isupper(tk[i][0])) j++; |
|
else return j; |
|
} |
|
|
|
return j; |
|
} |
|
|
|
void AbbrStra::get_alpha(const char *str1, char *str2) |
|
{ |
|
long i = 0, j = 0; |
|
long len = strlen(str1); |
|
|
|
while(i<len) { |
|
if(isalpha(str1[i])) { |
|
str2[j] = str1[i]; |
|
j++; |
|
} |
|
i++; |
|
} |
|
str2[j] = '\0'; |
|
} |
|
|
|
|
|
bool AbbrStra::lf_ok(const char *shrtf, const char *longf) |
|
{ |
|
long i; |
|
long paren=0, sbrac=0; |
|
string s, l; |
|
|
|
|
|
for(i=strlen(longf)-1; i>=0; i--) { |
|
if(longf[i]=='(') paren++; |
|
if(longf[i]==')') paren--; |
|
if(longf[i]=='[') sbrac++; |
|
if(longf[i]==']') sbrac--; |
|
} |
|
if(paren!=0 || sbrac!=0) return false; |
|
|
|
s.assign(shrtf); |
|
l.assign(longf); |
|
|
|
for(i=0; i<s.length(); i++) s[i]=tolower(s[i]); |
|
for(i=0; i<l.length(); i++) l[i]=tolower(l[i]); |
|
|
|
|
|
if( (" "+l+" ").find(" "+s+" ")!=string::npos ) return false; |
|
|
|
return true; |
|
} |
|
|
|
|
|
|
|
long AbbrStra::search_backward(long sloc, long tinx, long tloc, const char *abbr, bool first) |
|
{ |
|
long sfloc=sloc, tkinx=tinx, tkloc=tloc; |
|
|
|
while(sfloc>=0) { |
|
loop1: while((tkloc>=0)&&(tok[tkinx][tkloc]!=abbr[sfloc])) tkloc--; |
|
if(tkloc<0) { |
|
tkinx--; |
|
if(tkinx<0) return 0; |
|
tkloc=strlen(tok[tkinx])-1; |
|
} |
|
else { |
|
if(sfloc==0) { |
|
if(tkloc!=0) { |
|
if(!first) { tkloc--; goto loop1; } |
|
else if(isalnum(tok[tkinx][tkloc-1])) { tkloc--; goto loop1; } |
|
} |
|
} |
|
mod[sfloc][0]=tkinx; |
|
mod[sfloc][1]=tkloc; |
|
sfloc--; tkloc--; |
|
} |
|
} |
|
|
|
return 1; |
|
} |
|
|
|
long AbbrStra::search_backward_adv(const char *abbr, bool flag) |
|
{ |
|
long i; |
|
long lna=strlen(abbr); |
|
|
|
i=0; |
|
while(i<lna){ |
|
if(search_backward(i,mod[i][0],mod[i][1]-1,abbr,flag)) return 1; |
|
i++; |
|
} |
|
return 0; |
|
} |
|
|
|
void AbbrStra::extract_lf(long begin, long end) |
|
{ |
|
strcpy(lf,tok[begin]); |
|
for(long i=begin+1; i<=end; i++) { |
|
strcat(lf," "); |
|
strcat(lf,tok[i]); |
|
} |
|
} |
|
|
|
|
|
void AbbrStra::extract_lf(long begin, long end, const char *str) |
|
{ |
|
token(str,tok); |
|
strcpy(lf,tok[begin]); |
|
for(long i=begin+1; i<=end; i++) { |
|
strcat(lf," "); |
|
strcat(lf,tok[i]); |
|
} |
|
} |
|
|
|
|
|
bool AbbrStra::exist_skipword(long nsf) |
|
{ |
|
long i=0, j=0, k; |
|
|
|
while(i<nsf) { |
|
if(i==(nsf-1)) k=ntk-mod[i][0]-1; |
|
else k=mod[i+1][0]-mod[i][0]-1; |
|
if(k>0) j+=k; |
|
i++; |
|
} |
|
|
|
if(j>0) return true; |
|
else return false; |
|
} |
|
|
|
|
|
bool AbbrStra::exist_n_skipwords(long nsf, long n) |
|
{ |
|
long i=0, j, k; |
|
bool flag=false; |
|
|
|
|
|
while(i<nsf) { |
|
if(i==(nsf-1)) k=ntk-mod[i][0]-1; |
|
else k=mod[i+1][0]-mod[i][0]-1; |
|
if(k>n) return false; |
|
if(k==n) flag=true; |
|
i++; |
|
} |
|
|
|
if(flag) return true; |
|
else return false; |
|
} |
|
|
|
|
|
bool AbbrStra::exist_n_stopwords(long nsf, long n) |
|
{ |
|
long i=0, j, k; |
|
bool flag=false; |
|
|
|
while(i<nsf) { |
|
if(i==(nsf-1)) k=ntk-mod[i][0]-1; |
|
else k=mod[i+1][0]-mod[i][0]-1; |
|
if(k>n) return false; |
|
if(k==n) flag=true; |
|
if(k>0) { |
|
while(k) { |
|
if(!wData->stp.find(tok[mod[i][0]+k])) return false; |
|
k--; |
|
} |
|
} |
|
i++; |
|
} |
|
|
|
if(flag) return true; |
|
else return false; |
|
} |
|
|
|
|
|
bool AbbrStra::stopword_ok(long nsf, long nsw) |
|
{ |
|
long i=0, j, k; |
|
|
|
while(i<nsf) { |
|
if(i==(nsf-1)) k=ntk-mod[i][0]-1; |
|
else k=mod[i+1][0]-mod[i][0]-1; |
|
if(k>nsw) return false; |
|
if(k>0) { |
|
while(k) { |
|
if(!wData->stp.find(tok[mod[i][0]+k])) return false; |
|
k--; |
|
} |
|
} |
|
i++; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
bool AbbrStra::skip_stop_ok(long nsf, long nsw, long n) |
|
{ |
|
long i=0, j, k, nstp; |
|
|
|
while(i<nsf) { |
|
if(i==(nsf-1)) k=ntk-mod[i][0]-1; |
|
else k=mod[i+1][0]-mod[i][0]-1; |
|
if(k>nsw) return false; |
|
|
|
if(k>(nsw-n)) { |
|
nstp=0; |
|
while(k) { |
|
if(wData->stp.find(tok[mod[i][0]+k])) nstp++; |
|
k--; |
|
} |
|
if(nstp<n) return false; |
|
} |
|
i++; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
bool AbbrStra::skip_stop_ok2(long nsf, long nsw, long n) |
|
{ |
|
long i=0, j, k, nstp; |
|
|
|
while(i<nsf) { |
|
if(i==(nsf-1)) k=ntk-mod[i][0]-1; |
|
else k=mod[i+1][0]-mod[i][0]-1; |
|
if((k>0)&&(k!=nsw)) return false; |
|
if(k>0) { |
|
nstp=0; |
|
while(k) { |
|
if(wData->stp.find(tok[mod[i][0]+k])) nstp++; |
|
k--; |
|
} |
|
if(nstp<n) return false; |
|
} |
|
|
|
i++; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
bool AbbrStra::skipword_ok(long nsf, long nsw) |
|
{ |
|
long i=0, j, k; |
|
|
|
while(i<nsf) { |
|
if(i==(nsf-1)) k=ntk-mod[i][0]-1; |
|
else k=mod[i+1][0]-mod[i][0]-1; |
|
if(k>nsw) return false; |
|
i++; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
bool AbbrStra::is_subword(long nsf) |
|
{ |
|
long i=0; |
|
char word[1000]; |
|
|
|
while(i<nsf) { |
|
if(mod[i][1]!=0) { |
|
strcpy(word,tok[mod[i][0]]+mod[i][1]); |
|
if(wData->wrdset.count(word)==0) return false; |
|
} |
|
i++; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
bool AbbrStra::is_BeginWrdMatch(long nsf, bool general) |
|
{ |
|
long i=0, j; |
|
bool *bwm = new bool [ntk]; |
|
|
|
for(j=0; j<ntk; j++) bwm[j] = false; |
|
|
|
while(i<nsf) { |
|
if(mod[i][1]==0) |
|
bwm[mod[i][0]] = true; |
|
else if( general && (!isalnum(tok[mod[i][0]][mod[i][1]-1])) ) |
|
bwm[mod[i][0]] = true; |
|
i++; |
|
} |
|
|
|
for(j=0; j<nsf; j++) |
|
if(!bwm[mod[j][0]]) { |
|
delete [] bwm; |
|
return false; |
|
} |
|
|
|
delete [] bwm; |
|
|
|
return true; |
|
} |
|
|
|
|
|
bool AbbrStra::is_WithinWrdMatch(long nsf, bool general) |
|
{ |
|
long i=0, wwm=0; |
|
|
|
while(i<nsf) { |
|
if(!general) { |
|
if(mod[i][1]>0) wwm++; |
|
} |
|
else { |
|
if(mod[i][1]>0 && isalnum(tok[mod[i][0]][mod[i][1]-1])) wwm++; |
|
} |
|
i++; |
|
} |
|
|
|
if(wwm>0) return true; |
|
else return false; |
|
} |
|
|
|
|
|
bool AbbrStra::is_FirstLetMatch(long nsf, bool general) |
|
{ |
|
long i=0, flm=0, flm2=0; |
|
|
|
while(i<nsf) { |
|
if(mod[i][1]==0) flm++; |
|
else if( general && (!isalnum(tok[mod[i][0]][mod[i][1]-1])) ) { |
|
flm++; flm2++; |
|
} |
|
i++; |
|
} |
|
|
|
if(flm==nsf) return true; |
|
else return false; |
|
} |
|
|
|
|
|
bool AbbrStra::is_FirstLetMatch2(long nsf, bool general) |
|
{ |
|
long i=0, flm=0, flm2=0; |
|
|
|
while(i<nsf) { |
|
if(mod[i][1]==0) flm++; |
|
else if( general && (!isalnum(tok[mod[i][0]][mod[i][1]-1])) ) { |
|
flm++; flm2++; |
|
} |
|
i++; |
|
} |
|
|
|
if( (flm==nsf) && (flm2>=1) ) return true; |
|
else return false; |
|
} |
|
|
|
|
|
bool AbbrStra::is_FirstLetSMatch(const char *abbr, bool general) |
|
{ |
|
long i=0, j=strlen(abbr)-1, flm=0, lsm=0; |
|
|
|
while(i<j) { |
|
if(mod[i][1]==0) flm++; |
|
else if( general && (!isalnum(tok[mod[i][0]][mod[i][1]-1])) ) flm++; |
|
i++; |
|
} |
|
|
|
if( (tok[mod[j][0]][mod[j][1]]=='s') && |
|
(mod[j][1]==(strlen(tok[mod[j][0]])-1)) && |
|
mod[j][0]==mod[j-1][0] ) lsm++; |
|
|
|
if((flm==j) && (lsm==1)) return true; |
|
else return false; |
|
} |
|
|
|
|
|
bool AbbrStra::is_ContLetMatch(long nsf) |
|
{ |
|
long i=0, cl=1; |
|
|
|
while(i<(nsf-1)) { |
|
if( mod[i][0]==mod[i+1][0] && |
|
(mod[i][1]+1)==mod[i+1][1] ) cl++; |
|
i++; |
|
} |
|
|
|
if(cl>=2) return true; |
|
else return false; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
bool AbbrStra::set_condition(const char *str1) |
|
{ |
|
int n=0, m=0, o=0; |
|
|
|
switch(setCondition) { |
|
case 1: |
|
for(long i=strlen(str1)-1; i>=0; i--) |
|
if(!isalpha(str1[i])) |
|
return false; |
|
return true; |
|
break; |
|
case 2: |
|
if(!isalnum(str1[0])) return false; |
|
for(long i=strlen(str1)-1; i>=0; i--) { |
|
if(isalpha(str1[i])) n++; |
|
else m++; |
|
} |
|
if( (n>0) && (m>0) ) return true; |
|
else return false; |
|
break; |
|
case 3: |
|
for(long i=strlen(str1)-1; i>=0; i--) { |
|
if(!isalnum(str1[i])) return false; |
|
if(isalpha(str1[i])) n++; |
|
if(isdigit(str1[i])) m++; |
|
} |
|
if( (n>0) && (m>0) ) return true; |
|
else return false; |
|
break; |
|
case 4: |
|
if(!isalpha(str1[0])) return false; |
|
for(long i=strlen(str1)-1; i>=0; i--) { |
|
if(isdigit(str1[i])) return false; |
|
if(!isalnum(str1[i])) n++; |
|
} |
|
if(n>0) return true; |
|
else return false; |
|
break; |
|
case 5: |
|
if(!isalnum(str1[0])) return false; |
|
for(long i=strlen(str1)-1; i>0; i--) { |
|
if(!isalnum(str1[i])) return true; |
|
} |
|
return false; |
|
break; |
|
case 6: |
|
if(!isalnum(str1[0])) return false; |
|
for(long i=strlen(str1)-1; i>=0; i--) { |
|
if(isalpha(str1[i])) n++; |
|
if(isdigit(str1[i])) m++; |
|
if(!isalnum(str1[i])) o++; |
|
} |
|
if( (n>0) && (m>0) && (o>0) ) return true; |
|
else return false; |
|
break; |
|
case 7: |
|
if(!isalnum(str1[0])) return false; |
|
for(long i=strlen(str1)-1; i>=0; i--) |
|
if(isalpha(str1[i])) return true; |
|
return false; |
|
break; |
|
default: |
|
cout << "Not defined set condition\n"; |
|
exit(1); |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
bool FirstLetOneChSF::set_condition(const char *shrtf, const char *longf, char *str) |
|
{ |
|
long i=0, len=strlen(shrtf), numtk; |
|
char tk[1000][1000]; |
|
|
|
|
|
while(i<len && isalpha(shrtf[i])) i++; |
|
if(i!=len) return false; |
|
|
|
|
|
numtk = tokenize(longf,tk); |
|
if(len>numtk) return false; |
|
|
|
for(i=0; i<len; i++) |
|
str[i] = tk[numtk-len+i][0]; |
|
str[i] = '\0'; |
|
|
|
if(!is_alpha(str)) return false; |
|
|
|
return true; |
|
} |
|
|
|
|
|
long FirstLetOneChSF::strategy(const char *sf_, const char *str_) { |
|
long lna,lnt,flag; |
|
bool genFL=false; |
|
char phr[10000], phrl[10000]; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
get_str(str_,phr,1); |
|
str_tolower(phr,phrl); |
|
|
|
if(is_onealpha(phr)) return 0; |
|
if(isupper_str(phr)) return 0; |
|
if(wData->stp.find(phrl)) return 0; |
|
if(!wData->lfs.find(phrl)) return 0; |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_FirstLetMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
bool FirstLet::set_condition(const char *shrtf, const char *longf, char *str) |
|
{ |
|
long i=0, len=strlen(shrtf), numtk; |
|
char tk[1000][1000]; |
|
|
|
|
|
while(i<len && isalpha(shrtf[i])) i++; |
|
if(i!=len) return false; |
|
|
|
|
|
numtk = tokenize(longf,tk); |
|
if(len>numtk) return false; |
|
|
|
for(i=0; i<len; i++) |
|
str[i] = tk[numtk-len+i][0]; |
|
str[i] = '\0'; |
|
|
|
if(!is_alpha(str)) return false; |
|
|
|
return true; |
|
} |
|
|
|
|
|
long FirstLet::strategy(const char *sf_, const char *str_) { |
|
long lna,lnt,flag; |
|
bool genFL=false; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_FirstLetMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long FirstLetGen::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_FirstLetMatch2(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long FirstLetGen2::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_FirstLetMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
bool FirstLetGenS::set_condition(const char *str) |
|
{ |
|
if(str[strlen(str)-1]!='s') return false; |
|
|
|
for(long i=strlen(str)-2; i>=0; i--) { |
|
if(!isupper(str[i])) return false; |
|
if(!isalpha(str[i])) return false; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
long FirstLetGenS::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
if(!set_condition(sf_)) return 0; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_FirstLetSMatch(sf,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long FirstLetGenStp::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!exist_skipword(lna)) continue; |
|
if(!stopword_ok(lna,1)) continue; |
|
if(!is_FirstLetMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long FirstLetGenStp2::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!exist_n_stopwords(lna,2)) continue; |
|
if(!is_FirstLetMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long FirstLetGenSkp::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!exist_skipword(lna)) continue; |
|
if(!skipword_ok(lna,1)) continue; |
|
if(!is_FirstLetMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long WithinWrdWrd::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_subword(lna)) continue; |
|
if(!is_WithinWrdMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long WithinWrdFWrd::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_subword(lna)) continue; |
|
if(!is_BeginWrdMatch(lna,genFL)) continue; |
|
if(!is_WithinWrdMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long WithinWrdFWrdSkp::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!exist_skipword(lna)) continue; |
|
if(!skipword_ok(lna,1)) continue; |
|
if(!is_subword(lna)) continue; |
|
if(!is_BeginWrdMatch(lna,genFL)) continue; |
|
if(!is_WithinWrdMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long WithinWrdLet::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_WithinWrdMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long WithinWrdFLet::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_BeginWrdMatch(lna,genFL)) continue; |
|
if(!is_WithinWrdMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long WithinWrdFLetSkp::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!exist_skipword(lna)) continue; |
|
if(!skipword_ok(lna,1)) continue; |
|
if(!is_BeginWrdMatch(lna,genFL)) continue; |
|
if(!is_WithinWrdMatch(lna,genFL)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long ContLet::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,0)) continue; |
|
if(!is_BeginWrdMatch(lna,genFL)) continue; |
|
if(!is_ContLetMatch(lna)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long ContLetSkp::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!exist_skipword(lna)) continue; |
|
if(!skipword_ok(lna,1)) continue; |
|
if(!is_BeginWrdMatch(lna,genFL)) continue; |
|
if(!is_ContLetMatch(lna)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
long AnyLet::strategy(const char *sf_, const char *str_) |
|
{ |
|
long lna,lnt,flag; |
|
bool genFL=true; |
|
|
|
str_tolower(sf_,sf); |
|
str_tolower(str_,text); |
|
|
|
token(text,tok); |
|
lna = strlen(sf); |
|
lnt = strlen(tok[ntk-1]); |
|
|
|
flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); |
|
if(!flag) return 0; |
|
|
|
do { |
|
if(!skipword_ok(lna,1)) continue; |
|
|
|
extract_lf(mod[0][0],ntk-1,str_); |
|
return 1; |
|
} while(search_backward_adv(sf,genFL)); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
|
|
|
|
AbbrStra * StratUtil::strat_factory(string name) |
|
{ |
|
if(name=="FirstLetOneChSF") return new FirstLetOneChSF; |
|
else if(name=="FirstLet") return new FirstLet; |
|
else if(name=="FirstLetGen") return new FirstLetGen; |
|
else if(name=="FirstLetGen2") return new FirstLetGen2; |
|
else if(name=="FirstLetGenS") return new FirstLetGenS; |
|
else if(name=="FirstLetGenStp") return new FirstLetGenStp; |
|
else if(name=="FirstLetGenStp2") return new FirstLetGenStp2; |
|
else if(name=="FirstLetGenSkp") return new FirstLetGenSkp; |
|
else if(name=="WithinWrdWrd") return new WithinWrdWrd; |
|
else if(name=="WithinWrdFWrd") return new WithinWrdFWrd; |
|
else if(name=="WithinWrdFWrdSkp") return new WithinWrdFWrdSkp; |
|
else if(name=="WithinWrdLet") return new WithinWrdLet; |
|
else if(name=="WithinWrdFLet") return new WithinWrdFLet; |
|
else if(name=="WithinWrdFLetSkp") return new WithinWrdFLetSkp; |
|
else if(name=="ContLet") return new ContLet; |
|
else if(name=="ContLetSkp") return new ContLetSkp; |
|
else if(name=="AnyLet") return new AnyLet; |
|
else { cout << "Fail strat_factory\n"; exit(1); } |
|
} |
|
|
|
|
|
|
|
|
|
|
|
bool StratUtil::group_sf(const char *sf, string &grp) |
|
{ |
|
long i, j, len=strlen(sf); |
|
long al=0, num=0, nonalnum=0; |
|
long paren=0, sbrac=0; |
|
|
|
grp = ""; |
|
|
|
if(!isalnum(sf[0])) return false; |
|
for(i=0; i<len; i++) { |
|
if(isalpha(sf[i])) al++; |
|
else if(isdigit(sf[i])) num++; |
|
else nonalnum++; |
|
} |
|
if(al<1) return false; |
|
|
|
|
|
for(i=len-1; i>=0; i--) { |
|
if(sf[i]=='(') paren++; |
|
if(sf[i]==')') paren--; |
|
if(sf[i]=='[') sbrac++; |
|
if(sf[i]==']') sbrac--; |
|
} |
|
if(paren!=0 || sbrac!=0) return false; |
|
|
|
if(al==len) grp.assign("Al"); |
|
else if(num>0) grp.assign("Num"); |
|
else if(nonalnum>0) grp.assign("Spec"); |
|
else { cout << "No sf group\n"; exit(1); } |
|
|
|
|
|
len = len>5 ? 5 : len; |
|
|
|
switch(len) { |
|
case 1: |
|
grp.append("1"); |
|
break; |
|
case 2: |
|
grp.append("2"); |
|
break; |
|
case 3: |
|
grp.append("3"); |
|
break; |
|
case 4: |
|
grp.append("4"); |
|
break; |
|
case 5: |
|
grp.append("5"); |
|
break; |
|
default: |
|
cout << "Not defined #-ch SF" << endl; |
|
exit(1); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
bool StratUtil::group_sf(const char *sf, const char *lf, string &grp) |
|
{ |
|
long i, j, len=strlen(sf); |
|
long al=0, num=0, nonalnum=0; |
|
long paren=0, sbrac=0; |
|
|
|
if(strlen(lf)<len) return false; |
|
if(!isalnum(sf[0])) return false; |
|
for(i=0; i<len; i++) { |
|
if(isalpha(sf[i])) al++; |
|
else if(isdigit(sf[i])) num++; |
|
else nonalnum++; |
|
} |
|
if(al<1) return false; |
|
if(al>10) return false; |
|
if(num_token(sf)>2) return false; |
|
|
|
|
|
for(i=len-1; i>=0; i--) { |
|
if(sf[i]=='(') paren++; |
|
if(sf[i]==')') paren--; |
|
if(sf[i]=='[') sbrac++; |
|
if(sf[i]==']') sbrac--; |
|
} |
|
if(paren!=0 || sbrac!=0) return false; |
|
|
|
if(al==len) grp.assign("Al"); |
|
else if(num>0) grp.assign("Num"); |
|
else if(nonalnum>0) grp.assign("Spec"); |
|
else { cout << "No sf group\n"; exit(1); } |
|
|
|
|
|
len = len>5 ? 5 : len; |
|
|
|
switch(len) { |
|
case 1: |
|
grp.append("1"); |
|
break; |
|
case 2: |
|
grp.append("2"); |
|
break; |
|
case 3: |
|
grp.append("3"); |
|
break; |
|
case 4: |
|
grp.append("4"); |
|
break; |
|
case 5: |
|
grp.append("5"); |
|
break; |
|
default: |
|
cout << "Not defined #-ch SF" << endl; |
|
exit(1); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
|
|
void StratUtil::remove_nonAlnum(const char *str1, char *str2) |
|
{ |
|
long i=0, j=0; |
|
|
|
while(str1[i]) { |
|
if(isalnum(str1[i])) { |
|
str2[j] = str1[i]; |
|
j++; |
|
} |
|
i++; |
|
} |
|
str2[j] = '\0'; |
|
} |
|
|
|
|
|
vector<string> StratUtil::get_strats(string s) |
|
{ |
|
if(s=="Al1") return Al1; |
|
else if(s=="Al2") return Al2; |
|
else if(s=="Al3") return Al3; |
|
else if(s=="Al4") return Al4; |
|
else if(s=="Al5") return Al5; |
|
else if(s=="Num2") return Num2; |
|
else if(s=="Num3") return Num3; |
|
else if(s=="Num4") return Num4; |
|
else if(s=="Num5") return Num5; |
|
else if(s=="Spec2") return Spec2; |
|
else if(s=="Spec3") return Spec3; |
|
else if(s=="Spec4") return Spec4; |
|
else if(s=="Spec5") return Spec5; |
|
else { cout << "Incorrect name\n"; exit(1); } |
|
} |
|
|
|
|
|
void StratUtil::push_back_strat(string sgp, string strat) |
|
{ |
|
if(sgp=="Al1") Al1.push_back(strat); |
|
else if(sgp=="Al2") Al2.push_back(strat); |
|
else if(sgp=="Al3") Al3.push_back(strat); |
|
else if(sgp=="Al4") Al4.push_back(strat); |
|
else if(sgp=="Al5") Al5.push_back(strat); |
|
else if(sgp=="Num2") Num2.push_back(strat); |
|
else if(sgp=="Num3") Num3.push_back(strat); |
|
else if(sgp=="Num4") Num4.push_back(strat); |
|
else if(sgp=="Num5") Num5.push_back(strat); |
|
else if(sgp=="Spec2") Spec2.push_back(strat); |
|
else if(sgp=="Spec3") Spec3.push_back(strat); |
|
else if(sgp=="Spec4") Spec4.push_back(strat); |
|
else if(sgp=="Spec5") Spec5.push_back(strat); |
|
} |
|
|
|
|
|
long StratUtil::exist_upperal(const char *str) |
|
{ |
|
long i, len=strlen(str); |
|
|
|
for(i=0; i<len; i++) |
|
if(isupper(str[i])) |
|
return 1; |
|
return 0; |
|
} |
|
|
|
long StratUtil::num_token(const char *str) |
|
{ |
|
long i,j=0,k=0; |
|
long n=strlen(str)-1; |
|
|
|
while(isblank(str[n])) n--; |
|
|
|
while(str[j]){ |
|
while(isblank(str[j]))j++; |
|
i=j; |
|
while((str[j])&&(!isblank(str[j])))j++; |
|
if(str[j]){ |
|
k++; |
|
j++; |
|
} |
|
} |
|
if((j-1)>n) k--; |
|
return k+1; |
|
} |
|
|
|
|