#include #include #include #include #include #include #include #include "Btree.h" #include "runn.h" using namespace std; namespace iret { Node::Node(void){ str=NULL; rel=NULL; pdn=NULL; } Node::Node(const char *ptr){ int i=strlen(ptr); str = new char[i+1]; strcpy(str,ptr); rel=NULL; pdn=NULL; } Node::Node(char const *ptr,void *dtr){ int i=strlen(ptr); str = new char[i+1]; strcpy(str,ptr); rel = dtr; pdn=NULL; } Node::~Node(){ if(str)delete [] str; } void Node::debug(void){ cout << "Node {" << endl; cout << " str: " << this->str << endl; if(rel==NULL)cout << " rel: NULL" << endl; else cout << " rel: " << (long)rel << endl; if(pdn==NULL)cout << " pdn: NULL" << endl; else cout << " pdn: " << (long)pdn << endl; cout << " }" << endl; } Page::Page(){ pdn=NULL; ndnm='\0'; } Page::Page(Page *const pz,Page *const pn,const int n){ pdn=pn; int j=(int)(pz->ndnm)-n; ndnm=(char)(j>0 ? j : 0); for(int i=0;i<(int)ndnm;i++){pnd[i]=(pz->pnd)[n+i];} } Page::~Page(){ for(int i=0;i<(int)ndnm;i++){ delete pnd[i]; } } void Page::clean(void){ for(int i=0;i<(int)ndnm;i++){ pnd[i]->str=NULL; } } void Page::insert(const int n,Node * const nd,const int j){ assert(jn;i--)pnd[i]=pnd[i-1]; pnd[n]=nd; } ndnm++; } int Page::search(int &a,int &b,const char *str,int &p){ int j; if((j=stc_my(a,b,str,pnd[0]->str))<0){ p=0; return(0); } else if(j==0){ p=0; return(1); } if((j=stc_my(a,b,str,pnd[(int)(ndnm-1)]->str))>0){ p=(int)ndnm; return(0); } else if(j==0){ p=(int)(ndnm-1); return(1); } int x=0,i; int y=(int)(ndnm-1); while(y-x>1){ i=(y+x)/2; if((j=stc_my(a,b,str,pnd[i]->str))==0){p=i;return(1);} else if(j<0)y=i; else x=i; } p=y; return(0); } int Page::search(int &a,int &b,char *str,int &p,Partial_match *btr){ int j; if((j=btr->stc_my_long(a,b,str,pnd[0]->str,0))<0){ p=0; return(0); } else if(j==0){ p=0; return(1); } if((j=btr->stc_my_long(a,b,str,pnd[(int)(ndnm-1)]->str,(int)(ndnm-1)))>0){ p=(int)ndnm; return(0); } else if(j==0){ p=(int)(ndnm-1); return(1); } int x=0,i; int y=(int)(ndnm-1); while(y-x>1){ i=(y+x)/2; if((j=btr->stc_my_long(a,b,str,pnd[i]->str,i))==0){p=i;return(1);} else if(j<0)y=i; else x=i; } p=y; return(0); } void Page::debug(void){ cout << "Page {" << endl; cout << " ndnm: " << (int)ndnm << endl; if(pdn==NULL)cout << " pdn: NULL" << endl; else cout << " pdn: " << (long)pdn << endl; for(int i=0;i<(int)ndnm;i++){ cout << i << " "; (this->pnd[i])->debug(); } cout << " }" << endl; } int stc_my(int &a,int &b,const char *str,const char *ptr) {register int i=(andnm = 1; (root->pnd)[0]=new Node(""); } int Btree::search(const char *str){ depth=-1; Page *pu=root; register int a=0,b=0,i,j; while(pu!=NULL){ depth++; pg[depth]=pu; j=(pu->search)(a,b,str,i); cnd[depth]=i; if(j==1)return(1); if(i==0)pu=pu->pdn; else pu=(pu->pnd)[i-1]->pdn; } return(0); } int Btree::insert(Node *nd){ int w,k; Page *pm,*pz; while((nd!=NULL)&&(depth)){ pm=pg[depth]; w=pm->ndnm; if(winsert(cnd[depth],nd,w); nd=NULL; } else { k=cnd[depth]; if(kpnd)[order-1])->pdn,order); pm->insert(k,nd,order); nd=pm->pnd[order]; nd->pdn=pz; pm->ndnm=order; } else if(k>order){ pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); pz->insert(k-order-1,nd,order-1); nd=pm->pnd[order]; nd->pdn=pz; pm->ndnm=order; } else { pz=new Page(pm,nd->pdn,order); nd->pdn=pz; pm->ndnm=order; } } depth--; } if(nd!=NULL){ pm=pg[depth]; w=pm->ndnm; if(winsert(cnd[depth],nd,w); else { root=new Page(); root->pdn=pm; k=cnd[depth]; if(kpnd)[order-1])->pdn,order); pm->insert(k,nd,order); (root->pnd)[0]=pm->pnd[order]; ((root->pnd)[0])->pdn=pz; root->ndnm=1; pm->ndnm=order; } else if(k>order){ pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); pz->insert(k-order-1,nd,order-1); (root->pnd)[0]=pm->pnd[order]; ((root->pnd)[0])->pdn=pz; root->ndnm=1; pm->ndnm=order; } else { pz=new Page(pm,nd->pdn,order); (root->pnd)[0]=nd; nd->pdn=pz; root->ndnm=1; pm->ndnm=order; } } } return(1); } void Btree::node_first(void){ depth=0; pg[depth]=root; cnd[depth]=0; Page *pm; while((pm=(pg[depth]->pdn))!=NULL){ depth++; pg[depth]=pm; cnd[depth]=0; } } int Btree::node_next(){ int i=cnd[depth]; Page *pd=((pg[depth]->pnd)[i])->pdn; if(pd!=NULL){ (cnd[depth])++; depth++; pg[depth]=pd; cnd[depth]=0; while((pd=(pg[depth]->pdn))!=NULL){ depth++; pg[depth]=pd; cnd[depth]=0; } } else { cnd[depth]=++i; while((depth>=1)&&(i==(pg[depth]->ndnm))){depth--;i=cnd[depth];} if((depth==0)&&(i==(pg[depth]->ndnm)))depth--; if(depth<0)return(0); } return(1); } char *Btree::show_str(){ return(((pg[depth]->pnd)[cnd[depth]])->str); } void *Btree::give_ptr(){ return(((pg[depth]->pnd)[cnd[depth]])->rel); } void Btree::set_ptr(void *dtr){ ((pg[depth]->pnd)[cnd[depth]])->rel=dtr; } Btree::~Btree(){ int pflag=get_qflag(); long k=0; if (copy) return; // only delete original if(!iclean){ node_first(); int i=depth,j; do{ j=node_next(); if(depthdepth){ delete pg[i]; i--; mark(pflag,++k,1000,"pages deleted"); } } else i=depth; } while(j); } else { node_first(); int i=depth,j; do{ j=node_next(); if(depthdepth){ pg[i]->clean(); delete pg[i]; i--; mark(pflag,++k,1000,"pages deleted"); } } else i=depth; } while(j); } } long Btree::list_write(ofstream &fout){ int pflag=get_qflag(); long ct=0; node_first(); while(node_next()){ fout << show_str() << endl; mark(pflag,++ct,1000,"strings written"); } fout.close(); return((int)fout.good()); } Btree::Btree(ifstream &fin){ copy=false; char cnam[256]; int pflag=get_qflag(); depth=0; pg[0]=root=new Page(); cnd[0]=root->ndnm = 1; (root->pnd)[0]=new Node(""); Node *pno; long ct=0; while(get_string(cnam,fin,'\n')){ pno = new Node(cnam); add(pno); mark(pflag,++ct,10000,"strings read"); } fin.close(); } int Btree::add(Node *nd){ int w,k,dp; Page *pm,*pz; dp=depth; //uses dp in place of depth in insert. while((nd!=NULL)&&(dp)){ pm=pg[dp]; w=pm->ndnm; if(winsert(cnd[dp],nd,w); nd=NULL; (cnd[dp])++; //variation from insert. } else { k=cnd[dp]; if(kpnd)[order-1])->pdn,order); pm->insert(k,nd,order); nd=pm->pnd[order]; nd->pdn=pz; pm->ndnm=order; } else if(k>order){ pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); pz->insert(k-order-1,nd,order-1); nd=pm->pnd[order]; nd->pdn=pz; pm->ndnm=order; } else { pz=new Page(pm,nd->pdn,order); nd->pdn=pz; pm->ndnm=order; } pg[dp]=pz; //2 lines of variation from insert. cnd[dp]=order; } dp--; } if(nd!=NULL){ pm=pg[dp]; w=pm->ndnm; if(winsert(cnd[dp],nd,w); (cnd[dp])++; //variation from insert. } else { root=new Page(); root->pdn=pm; k=cnd[dp]; if(kpnd)[order-1])->pdn,order); pm->insert(k,nd,order); (root->pnd)[0]=pm->pnd[order]; ((root->pnd)[0])->pdn=pz; root->ndnm=1; pm->ndnm=order; } else if(k>order){ pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); pz->insert(k-order-1,nd,order-1); (root->pnd)[0]=pm->pnd[order]; ((root->pnd)[0])->pdn=pz; root->ndnm=1; pm->ndnm=order; } else { pz=new Page(pm,nd->pdn,order); (root->pnd)[0]=nd; nd->pdn=pz; root->ndnm=1; pm->ndnm=order; } next_empty(); //variation from insert. } } return(1); } void Btree::next_empty(){ depth=0; pg[depth]=root; int i=cnd[depth]=root->ndnm; Page *pm; while((pm=((pg[depth]->pnd)[i-1])->pdn)!=NULL){ depth++; pg[depth]=pm; i=cnd[depth]=pm->ndnm; } } Str_str::Str_str() : Btree() { } Str_str::~Str_str(){ if(copy)return; this->node_first(); while(this->node_next())delete [] (char*)this->give_ptr(); } void Str_str::add_pair(const char *one,const char *two){ Node *pnd; if(search(one)){ cout << "Duplicate string in keys list = " << one << endl; exit(0); } else { int i=strlen(two); char *st=new char[i+1]; strcpy(st,two); pnd=new Node(one,(void *)st); add(pnd); } } char *Str_str::match(const char *one){ if(search(one)){ return((char*)give_ptr()); } else { cout << "String not a key = " << one << endl; exit(0); } } List::List() : Btree() { cnt_key=0; } List::~List(){ } void List::add_key(const char *str){ Node *pnd; if(!search(str)){ pnd=new Node(str); add(pnd); } } void List::add_key_count(const char *str){ Node *pnd; if(!search(str)){ pnd=new Node(str); add(pnd); cnt_key++; } } void List::addp_key_count(char *str){ Node *pnd; if(!search(str)){ pnd=new Node; pnd->str=str; add(pnd); cnt_key++; } } Num_num::Num_num() : Btree() { } Num_num::~Num_num(){ if(copy)return; this->node_first(); while(this->node_next())delete (long*)this->give_ptr(); } void Num_num::add_pair(long i,long j){ Node *pnd; char cnam[256]; long_str(cnam,i); if(!search(cnam)){ long *st=new long; *st=j; pnd=new Node(cnam,(void *)st); add(pnd); } } long Num_num::match(long i){ char cnam[256]; long_str(cnam,i); if(search(cnam)){ return(*((long*)give_ptr())); } else return(LNEG); } Count::Count() : List() { total=0; } Count::~Count(){ if(copy)return; long *pk; this->node_first(); while(this->node_next()){ pk=(long*)(this->give_ptr()); if(pk)delete pk; } } void Count::add_count(const char *pch,long n){ long *ppt; Node *np; total+=n; if(this->search(pch)==0){ ppt = new long; (*ppt) =n; np=new Node(pch,(void*)ppt); this->insert(np); } else { (*(long*) this->give_ptr())+=n; } } void Count::add_countz(const char *pch,long n){ long *ppt; Node *np; if(this->search(pch)==0){ ppt = new long; (*ppt) =n; np=new Node(pch,(void*)ppt); this->insert(np); cnt_key++; } else { (*(long*) this->give_ptr())+=n; } } void Count::add_count2(const char *pch,long n){ long *ppt; Node *np; total+=n; if(this->search(pch)==0){ ppt = new long; (*ppt) =n; np=new Node(pch,(void*)ppt); this->insert(np); cnt_key++; } else { (*(long*) this->give_ptr())+=n; } } void Count::addp_count2(char *pch,long n){ long *ppt; Node *np; total+=n; if(this->search(pch)==0){ ppt = new long; (*ppt) =n; np=new Node; np->str=pch; np->rel=ppt; this->insert(np); cnt_key++; } else { (*(long*) this->give_ptr())+=n; } } void Count::correct(const char *pch,long n){ if(this->search(pch)){ (*(long*) this->give_ptr())=n; } } long Count::count(const char *pch){ if(this->search(pch)==0){ return(0); } else { return(*((long*) this->give_ptr())); } } long Count::count(void){ return(*((long*) this->give_ptr())); } void Count::max_count(const char *pch,long n){ long *ppt,i; Node *np; total+=n; if(!search(pch)){ ppt = new long; (*ppt) =n; np=new Node(pch,(void*)ppt); this->insert(np); } else { ppt=(long*)give_ptr(); if(*pptinsert(np); cnt_key++; } else { ppt=(long*)give_ptr(); if(*pptstr=pch; np->rel=ppt; this->insert(np); cnt_key++; } else { ppt=(long*)give_ptr(); if(*pptinsert(np); } else { ppt=(long*)give_ptr(); if(*ppt>n)*ppt=n; } } void Count::min_count2(const char *pch,long n){ long *ppt,i; Node *np; total+=n; if(!search(pch)){ ppt = new long; (*ppt) =n; np=new Node(pch,(void*)ppt); this->insert(np); cnt_key++; } else { ppt=(long*)give_ptr(); if(*ppt>n)*ppt=n; } } void Count::minp_count2(char *pch,long n){ long *ppt,i; Node *np; total+=n; if(!search(pch)){ ppt = new long; (*ppt) =n; np=new Node; np->str=pch; np->rel=ppt; this->insert(np); cnt_key++; } else { ppt=(long*)give_ptr(); if(*ppt>n)*ppt=n; } } //FCount (float count tree) FCount::FCount() : List() { total=0; } FCount::~FCount(){ if(copy)return; float *pk; this->node_first(); while(this->node_next()){ pk=(float*)(this->give_ptr()); if(pk)delete pk; } } void FCount::Copy(FCount &Fc){ char *pch; float *xx,*zz; Node *pN; pg[0]=root; cnd[0]=root->ndnm; Fc.node_first(); while(Fc.node_next()){ pch=Fc.show_str(); xx=(float*)Fc.give_ptr(); zz=new float; *zz=*xx; pN=new Node(pch,(void*)zz); add(pN); } } void FCount::add_count(const char *pch,float z){ float *ppt; Node *np; total+=z; if(this->search(pch)==0){ ppt = new float; (*ppt) =z; np=new Node(pch,(void*)ppt); this->insert(np); } else { (*(float*) this->give_ptr())+=z; } } void FCount::add_count2(const char *pch,float z){ float *ppt; Node *np; total+=z; if(this->search(pch)==0){ ppt = new float; (*ppt) =z; np=new Node(pch,(void*)ppt); this->insert(np); cnt_key++; } else { (*(float*) this->give_ptr())+=z; } } void FCount::addp_count2(char *pch,float z){ float *ppt; Node *np; total+=z; if(this->search(pch)==0){ ppt = new float; (*ppt) =z; np=new Node; np->str=pch; np->rel=ppt; this->insert(np); cnt_key++; } else { (*(float*) this->give_ptr())+=z; } } float FCount::count(const char *pch){ if(this->search(pch)==0){ return(0); } else { return(*((float*) this->give_ptr())); } } float FCount::count(void){ return(*((float*) this->give_ptr())); } //DCount (double precision count tree) DCount::DCount() : List() { total=0; } DCount::~DCount(){ if(copy)return; double *pk; this->node_first(); while(this->node_next()){ pk=(double*)(this->give_ptr()); if(pk)delete pk; } } void DCount::Copy(DCount &Dc){ char *pch; double *xx,*zz; Node *pN; pg[0]=root; cnd[0]=root->ndnm; Dc.node_first(); while(Dc.node_next()){ pch=Dc.show_str(); xx=(double*)Dc.give_ptr(); zz=new double; *zz=*xx; pN=new Node(pch,(void*)zz); add(pN); } } void DCount::add_count(const char *pch,double z){ double *ppt; Node *np; total+=z; if(this->search(pch)==0){ ppt = new double; (*ppt) =z; np=new Node(pch,(void*)ppt); this->insert(np); } else { (*(double*) this->give_ptr())+=z; } } void DCount::add_count2(const char *pch,double z){ double *ppt; Node *np; total+=z; if(this->search(pch)==0){ ppt = new double; (*ppt) =z; np=new Node(pch,(void*)ppt); this->insert(np); cnt_key++; } else { (*(double*) this->give_ptr())+=z; } } void DCount::addp_count2(char *pch,double z){ double *ppt; Node *np; total+=z; if(this->search(pch)==0){ ppt = new double; (*ppt) =z; np=new Node; np->str=pch; np->rel=ppt; this->insert(np); cnt_key++; } else { (*(double*) this->give_ptr())+=z; } } double DCount::count(const char *pch){ if(this->search(pch)==0){ return(0); } else { return(*((double*) this->give_ptr())); } } double DCount::count(void){ return(*((double*) this->give_ptr())); } void DCount::max_count(const char *pch,double z){ double *ppt; Node *np; total+=z; if(!search(pch)){ ppt = new double; (*ppt) =z; np=new Node(pch,(void*)ppt); this->insert(np); } else { ppt=(double*)give_ptr(); if(*pptinsert(np); cnt_key++; } else { ppt=(double*)give_ptr(); if(*pptstr=pch; np->rel=ppt; this->insert(np); cnt_key++; } else { ppt=(double*)give_ptr(); if(*pptinsert(np); } else { ppt=(double*)give_ptr(); if(*ppt>z)*ppt=z; } } void DCount::min_count2(const char *pch,double z){ double *ppt; Node *np; total+=z; if(!search(pch)){ ppt = new double; (*ppt) =z; np=new Node(pch,(void*)ppt); this->insert(np); cnt_key++; } else { ppt=(double*)give_ptr(); if(*ppt>z)*ppt=z; } } void DCount::minp_count2(char *pch,double z){ double *ppt; Node *np; total+=z; if(!search(pch)){ ppt = new double; (*ppt) =z; np=new Node; np->str=pch; np->rel=ppt; this->insert(np); cnt_key++; } else { ppt=(double*)give_ptr(); if(*ppt>z)*ppt=z; } } void DCount::debug(void){ node_first(); while(node_next()){ cout << count() << " " << show_str() << endl; } } //Partial Match Partial_match::Partial_match() : Count() { } Partial_match::~Partial_match(){ } void Partial_match::long_match(char *str,List &Lst){ char *pch; while(*str!='\0'){ if(this->search_long(str)){ pch=this->show_str(); Lst.add_key_count(pch); } if((pch=strchr(str,' '))!=NULL)str=pch+1; else str=str+strlen(str); } } void Partial_match::local_match(char *str,List &Lst){ char *pch; int i,j; if(*str!='\0'){ if(this->search_long(str)){ pch=this->show_str(); Lst.add_key_count(pch); i=strlen(pch)-1; while(0search(str); *(str+i)=' '; if(j){ pch=this->show_str(); Lst.add_key_count(pch); } i--; } } } } } void Partial_match::all_match(char *str,List &Lst){ char *pch; int i,j; while(*str!='\0'){ if(this->search_long(str)){ pch=this->show_str(); Lst.add_key_count(pch); i=strlen(pch)-1; while(0search(str); *(str+i)=' '; if(j){ pch=this->show_str(); Lst.add_key_count(pch); } i--; } } } if((pch=strchr(str,' '))!=NULL)str=pch+1; else str=str+strlen(str); } } void Partial_match::long_match(char *str,Count &Cnt,long n){ char *pch; while(*str!='\0'){ if(this->search_long(str)){ pch=this->show_str(); Cnt.add_count2(pch,n); } if((pch=strchr(str,' '))!=NULL)str=pch+1; else str=str+strlen(str); } } void Partial_match::local_match(char *str,Count &Cnt,long n){ char *pch; int i,j; if(*str!='\0'){ if(this->search_long(str)){ pch=this->show_str(); Cnt.add_count2(pch,n); i=strlen(pch)-1; while(0search(str); *(str+i)=' '; if(j){ pch=this->show_str(); Cnt.add_count2(pch,n); } i--; } } } } } void Partial_match::all_match(char *str,Count &Cnt,long n){ char *pch; int i,j; while(*str!='\0'){ if(this->search_long(str)){ pch=this->show_str(); Cnt.add_count2(pch,n); i=strlen(pch)-1; while(0search(str); *(str+i)=' '; if(j){ pch=this->show_str(); Cnt.add_count2(pch,n); } i--; } } } if((pch=strchr(str,' '))!=NULL)str=pch+1; else str=str+strlen(str); } } int Partial_match::search_long(char *str){ int a=0,b=0,i,j; len=strlen(str); if(this->step_one(a,b,str))return(1); i=(asearch(str); *(str+i)=' '; if(j)return(1); i--; } } if(cln_o){ depth=depth_o; cnd[depth]=index_o; return(1); } else return(0); } int Partial_match::step_one(int &a,int &b,char *str){ char c; cln_o=0; cln=0; while((c=*(str+cln))&&c!=32)cln++; *(str+cln)='\0'; depth=-1; Page *pu=root; int i,j; while(pu!=NULL){ depth++; pg[depth]=pu; j=(pu->search)(a,b,str,i,this); cnd[depth]=i; if(j==1)return(1); if(i==0)pu=pu->pdn; else pu=(pu->pnd)[i-1]->pdn; } if(cln