diff --git a/CRF/java/.am b/CRF/java/.am index 0608003b7dc378ad7a8674445fd2dbf8094b1a81..2c3332e83629c844a656374dbc551e7171724454 100644 --- a/CRF/java/.am +++ b/CRF/java/.am @@ -1,27 +1,27 @@ -TARGET=MeCab -JAVAC=javac -JAVA=java -JAR=jar -CXX=c++ -INCLUDE=/usr/lib/jvm/java-6-openjdk/include - -PACKAGE=org/chasen/mecab - -LIBS=`mecab-config --libs` -INC=`mecab-config --cflags` -I$(INCLUDE) -I$(INCLUDE)/linux - -all: - $(CXX) -O3 -c -fpic $(TARGET)_wrap.cxx $(INC) - $(CXX) -shared $(TARGET)_wrap.o -o lib$(TARGET).so $(LIBS) - $(JAVAC) $(PACKAGE)/*.java - $(JAVAC) test.java - $(JAR) cfv $(TARGET).jar $(PACKAGE)/*.class - -test: - env LD_LIBRARY_PATH=. $(JAVA) test - -clean: - rm -fr *.jar *.o *.so *.class $(PACKAGE)/*.class - -cleanall: - rm -fr $(TARGET).java *.cxx +TARGET=MeCab +JAVAC=javac +JAVA=java +JAR=jar +CXX=c++ +INCLUDE=/usr/lib/jvm/java-6-openjdk/include + +PACKAGE=org/chasen/mecab + +LIBS=`mecab-config --libs` +INC=`mecab-config --cflags` -I$(INCLUDE) -I$(INCLUDE)/linux + +all: + $(CXX) -O3 -c -fpic $(TARGET)_wrap.cxx $(INC) + $(CXX) -shared $(TARGET)_wrap.o -o lib$(TARGET).so $(LIBS) + $(JAVAC) $(PACKAGE)/*.java + $(JAVAC) test.java + $(JAR) cfv $(TARGET).jar $(PACKAGE)/*.class + +test: + env LD_LIBRARY_PATH=. $(JAVA) test + +clean: + rm -fr *.jar *.o *.so *.class $(PACKAGE)/*.class + +cleanall: + rm -fr $(TARGET).java *.cxx diff --git a/CRF/java/org/chasen/crfpp/Model.java b/CRF/java/org/chasen/crfpp/Model.java index 2ec8ad6b5d0d483d21e4d216498914b5c5fc0709..8e4f880f42022e35573b980bfc6a3a9f5843f799 100644 --- a/CRF/java/org/chasen/crfpp/Model.java +++ b/CRF/java/org/chasen/crfpp/Model.java @@ -1,51 +1,51 @@ -/* ---------------------------------------------------------------------------- - * This file was automatically generated by SWIG (http://www.swig.org). - * Version 1.3.40 - * - * Do not make changes to this file unless you know what you are doing--modify - * the SWIG interface file instead. - * ----------------------------------------------------------------------------- */ - -package org.chasen.crfpp; - -public class Model { - private long swigCPtr; - protected boolean swigCMemOwn; - - protected Model(long cPtr, boolean cMemoryOwn) { - swigCMemOwn = cMemoryOwn; - swigCPtr = cPtr; - } - - protected static long getCPtr(Model obj) { - return (obj == null) ? 0 : obj.swigCPtr; - } - - protected void finalize() { - delete(); - } - - public synchronized void delete() { - if (swigCPtr != 0) { - if (swigCMemOwn) { - swigCMemOwn = false; - CRFPPJNI.delete_Model(swigCPtr); - } - swigCPtr = 0; - } - } - - public Tagger createTagger() { - long cPtr = CRFPPJNI.Model_createTagger(swigCPtr, this); - return (cPtr == 0) ? null : new Tagger(cPtr, false); - } - - public String what() { - return CRFPPJNI.Model_what(swigCPtr, this); - } - - public Model(String arg) { - this(CRFPPJNI.new_Model(arg), true); - } - -} +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.40 + * + * Do not make changes to this file unless you know what you are doing--modify + * the SWIG interface file instead. + * ----------------------------------------------------------------------------- */ + +package org.chasen.crfpp; + +public class Model { + private long swigCPtr; + protected boolean swigCMemOwn; + + protected Model(long cPtr, boolean cMemoryOwn) { + swigCMemOwn = cMemoryOwn; + swigCPtr = cPtr; + } + + protected static long getCPtr(Model obj) { + return (obj == null) ? 0 : obj.swigCPtr; + } + + protected void finalize() { + delete(); + } + + public synchronized void delete() { + if (swigCPtr != 0) { + if (swigCMemOwn) { + swigCMemOwn = false; + CRFPPJNI.delete_Model(swigCPtr); + } + swigCPtr = 0; + } + } + + public Tagger createTagger() { + long cPtr = CRFPPJNI.Model_createTagger(swigCPtr, this); + return (cPtr == 0) ? null : new Tagger(cPtr, false); + } + + public String what() { + return CRFPPJNI.Model_what(swigCPtr, this); + } + + public Model(String arg) { + this(CRFPPJNI.new_Model(arg), true); + } + +} diff --git a/CRF/perl/Makefile.old b/CRF/perl/Makefile.old index c66615cf2588d54aa540d85c7716175cbd333766..1eef3c46b868e14a74c86cf94dce322594a4ddb3 100644 --- a/CRF/perl/Makefile.old +++ b/CRF/perl/Makefile.old @@ -1,931 +1,931 @@ -# This Makefile is for the CRFPP extension to perl. -# -# It was generated automatically by MakeMaker version -# 6.56 (Revision: 65600) from the contents of -# Makefile.PL. Don't edit this file, edit Makefile.PL instead. -# -# ANY CHANGES MADE HERE WILL BE LOST! -# -# MakeMaker ARGV: () -# - -# MakeMaker Parameters: - -# BUILD_REQUIRES => { } -# CC => q[c++] -# INC => q[] -# LD => q[c++] -# LIBS => q[-lpthread -lcrfpp] -# NAME => q[CRFPP] -# OBJECT => q[CRFPP_wrap.o] -# PREREQ_PM => { } - -# --- MakeMaker post_initialize section: - - -# --- MakeMaker const_config section: - -# These definitions are from config.sh (via /usr/lib/perl/5.12/Config.pm). -# They may have been overridden via Makefile.PL or on the command line. -AR = ar -CC = c++ -CCCDLFLAGS = -fPIC -CCDLFLAGS = -Wl,-E -DLEXT = so -DLSRC = dl_dlopen.xs -EXE_EXT = -FULL_AR = /usr/bin/ar -LD = c++ -LDDLFLAGS = -shared -O2 -g -L/usr/local/lib -fstack-protector -LDFLAGS = -fstack-protector -L/usr/local/lib -LIBC = -LIB_EXT = .a -OBJ_EXT = .o -OSNAME = linux -OSVERS = 2.6.24-28-server -RANLIB = : -SITELIBEXP = /usr/local/share/perl/5.12.4 -SITEARCHEXP = /usr/local/lib/perl/5.12.4 -SO = so -VENDORARCHEXP = /usr/lib/perl5 -VENDORLIBEXP = /usr/share/perl5 - - -# --- MakeMaker constants section: -AR_STATIC_ARGS = cr -DIRFILESEP = / -DFSEP = $(DIRFILESEP) -NAME = CRFPP -NAME_SYM = CRFPP -VERSION = -VERSION_MACRO = VERSION -VERSION_SYM = -DEFINE_VERSION = -D$(VERSION_MACRO)=\"$(VERSION)\" -XS_VERSION = -XS_VERSION_MACRO = XS_VERSION -XS_DEFINE_VERSION = -D$(XS_VERSION_MACRO)=\"$(XS_VERSION)\" -INST_ARCHLIB = blib/arch -INST_SCRIPT = blib/script -INST_BIN = blib/bin -INST_LIB = blib/lib -INST_MAN1DIR = blib/man1 -INST_MAN3DIR = blib/man3 -MAN1EXT = 1p -MAN3EXT = 3pm -INSTALLDIRS = site -DESTDIR = -PREFIX = /usr -PERLPREFIX = $(PREFIX) -SITEPREFIX = $(PREFIX)/local -VENDORPREFIX = $(PREFIX) -INSTALLPRIVLIB = $(PERLPREFIX)/share/perl/5.12 -DESTINSTALLPRIVLIB = $(DESTDIR)$(INSTALLPRIVLIB) -INSTALLSITELIB = $(SITEPREFIX)/share/perl/5.12.4 -DESTINSTALLSITELIB = $(DESTDIR)$(INSTALLSITELIB) -INSTALLVENDORLIB = $(VENDORPREFIX)/share/perl5 -DESTINSTALLVENDORLIB = $(DESTDIR)$(INSTALLVENDORLIB) -INSTALLARCHLIB = $(PERLPREFIX)/lib/perl/5.12 -DESTINSTALLARCHLIB = $(DESTDIR)$(INSTALLARCHLIB) -INSTALLSITEARCH = $(SITEPREFIX)/lib/perl/5.12.4 -DESTINSTALLSITEARCH = $(DESTDIR)$(INSTALLSITEARCH) -INSTALLVENDORARCH = $(VENDORPREFIX)/lib/perl5 -DESTINSTALLVENDORARCH = $(DESTDIR)$(INSTALLVENDORARCH) -INSTALLBIN = $(PERLPREFIX)/bin -DESTINSTALLBIN = $(DESTDIR)$(INSTALLBIN) -INSTALLSITEBIN = $(SITEPREFIX)/bin -DESTINSTALLSITEBIN = $(DESTDIR)$(INSTALLSITEBIN) -INSTALLVENDORBIN = $(VENDORPREFIX)/bin -DESTINSTALLVENDORBIN = $(DESTDIR)$(INSTALLVENDORBIN) -INSTALLSCRIPT = $(PERLPREFIX)/bin -DESTINSTALLSCRIPT = $(DESTDIR)$(INSTALLSCRIPT) -INSTALLSITESCRIPT = $(SITEPREFIX)/bin -DESTINSTALLSITESCRIPT = $(DESTDIR)$(INSTALLSITESCRIPT) -INSTALLVENDORSCRIPT = $(VENDORPREFIX)/bin -DESTINSTALLVENDORSCRIPT = $(DESTDIR)$(INSTALLVENDORSCRIPT) -INSTALLMAN1DIR = $(PERLPREFIX)/share/man/man1 -DESTINSTALLMAN1DIR = $(DESTDIR)$(INSTALLMAN1DIR) -INSTALLSITEMAN1DIR = $(SITEPREFIX)/man/man1 -DESTINSTALLSITEMAN1DIR = $(DESTDIR)$(INSTALLSITEMAN1DIR) -INSTALLVENDORMAN1DIR = $(VENDORPREFIX)/share/man/man1 -DESTINSTALLVENDORMAN1DIR = $(DESTDIR)$(INSTALLVENDORMAN1DIR) -INSTALLMAN3DIR = $(PERLPREFIX)/share/man/man3 -DESTINSTALLMAN3DIR = $(DESTDIR)$(INSTALLMAN3DIR) -INSTALLSITEMAN3DIR = $(SITEPREFIX)/man/man3 -DESTINSTALLSITEMAN3DIR = $(DESTDIR)$(INSTALLSITEMAN3DIR) -INSTALLVENDORMAN3DIR = $(VENDORPREFIX)/share/man/man3 -DESTINSTALLVENDORMAN3DIR = $(DESTDIR)$(INSTALLVENDORMAN3DIR) -PERL_LIB = /usr/share/perl/5.12 -PERL_ARCHLIB = /usr/lib/perl/5.12 -LIBPERL_A = libperl.a -FIRST_MAKEFILE = Makefile -MAKEFILE_OLD = Makefile.old -MAKE_APERL_FILE = Makefile.aperl -PERLMAINCC = $(CC) -PERL_INC = /usr/lib/perl/5.12/CORE -PERL = /usr/bin/perl -FULLPERL = /usr/bin/perl -ABSPERL = $(PERL) -PERLRUN = $(PERL) -FULLPERLRUN = $(FULLPERL) -ABSPERLRUN = $(ABSPERL) -PERLRUNINST = $(PERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)" -FULLPERLRUNINST = $(FULLPERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)" -ABSPERLRUNINST = $(ABSPERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)" -PERL_CORE = 0 -PERM_DIR = 755 -PERM_RW = 644 -PERM_RWX = 755 - -MAKEMAKER = /usr/share/perl/5.12/ExtUtils/MakeMaker.pm -MM_VERSION = 6.56 -MM_REVISION = 65600 - -# FULLEXT = Pathname for extension directory (eg Foo/Bar/Oracle). -# BASEEXT = Basename part of FULLEXT. May be just equal FULLEXT. (eg Oracle) -# PARENT_NAME = NAME without BASEEXT and no trailing :: (eg Foo::Bar) -# DLBASE = Basename part of dynamic library. May be just equal BASEEXT. -MAKE = make -FULLEXT = CRFPP -BASEEXT = CRFPP -PARENT_NAME = -DLBASE = $(BASEEXT) -VERSION_FROM = -INC = -OBJECT = CRFPP_wrap$(OBJ_EXT) -LDFROM = $(OBJECT) -LINKTYPE = dynamic -BOOTDEP = - -# Handy lists of source code files: -XS_FILES = -C_FILES = CRFPP_wrap.cxx -O_FILES = CRFPP_wrap.o -H_FILES = -MAN1PODS = -MAN3PODS = - -# Where is the Config information that we are using/depend on -CONFIGDEP = $(PERL_ARCHLIB)$(DFSEP)Config.pm $(PERL_INC)$(DFSEP)config.h - -# Where to build things -INST_LIBDIR = $(INST_LIB) -INST_ARCHLIBDIR = $(INST_ARCHLIB) - -INST_AUTODIR = $(INST_LIB)/auto/$(FULLEXT) -INST_ARCHAUTODIR = $(INST_ARCHLIB)/auto/$(FULLEXT) - -INST_STATIC = $(INST_ARCHAUTODIR)/$(BASEEXT)$(LIB_EXT) -INST_DYNAMIC = $(INST_ARCHAUTODIR)/$(DLBASE).$(DLEXT) -INST_BOOT = $(INST_ARCHAUTODIR)/$(BASEEXT).bs - -# Extra linker info -EXPORT_LIST = -PERL_ARCHIVE = -PERL_ARCHIVE_AFTER = - - -TO_INST_PM = CRFPP.pm - -PM_TO_BLIB = CRFPP.pm \ - $(INST_LIB)/CRFPP.pm - - -# --- MakeMaker platform_constants section: -MM_Unix_VERSION = 6.56 -PERL_MALLOC_DEF = -DPERL_EXTMALLOC_DEF -Dmalloc=Perl_malloc -Dfree=Perl_mfree -Drealloc=Perl_realloc -Dcalloc=Perl_calloc - - -# --- MakeMaker tool_autosplit section: -# Usage: $(AUTOSPLITFILE) FileToSplit AutoDirToSplitInto -AUTOSPLITFILE = $(ABSPERLRUN) -e 'use AutoSplit; autosplit($$ARGV[0], $$ARGV[1], 0, 1, 1)' -- - - - -# --- MakeMaker tool_xsubpp section: - -XSUBPPDIR = /usr/share/perl/5.12/ExtUtils -XSUBPP = $(XSUBPPDIR)$(DFSEP)xsubpp -XSUBPPRUN = $(PERLRUN) $(XSUBPP) -XSPROTOARG = -XSUBPPDEPS = /usr/share/perl/5.12/ExtUtils/typemap $(XSUBPP) -XSUBPPARGS = -typemap /usr/share/perl/5.12/ExtUtils/typemap -XSUBPP_EXTRA_ARGS = - - -# --- MakeMaker tools_other section: -SHELL = /bin/sh -CHMOD = chmod -CP = cp -MV = mv -NOOP = $(TRUE) -NOECHO = @ -RM_F = rm -f -RM_RF = rm -rf -TEST_F = test -f -TOUCH = touch -UMASK_NULL = umask 0 -DEV_NULL = > /dev/null 2>&1 -MKPATH = $(ABSPERLRUN) -MExtUtils::Command -e 'mkpath' -- -EQUALIZE_TIMESTAMP = $(ABSPERLRUN) -MExtUtils::Command -e 'eqtime' -- -FALSE = false -TRUE = true -ECHO = echo -ECHO_N = echo -n -UNINST = 0 -VERBINST = 0 -MOD_INSTALL = $(ABSPERLRUN) -MExtUtils::Install -e 'install([ from_to => {@ARGV}, verbose => '\''$(VERBINST)'\'', uninstall_shadows => '\''$(UNINST)'\'', dir_mode => '\''$(PERM_DIR)'\'' ]);' -- -DOC_INSTALL = $(ABSPERLRUN) -MExtUtils::Command::MM -e 'perllocal_install' -- -UNINSTALL = $(ABSPERLRUN) -MExtUtils::Command::MM -e 'uninstall' -- -WARN_IF_OLD_PACKLIST = $(ABSPERLRUN) -MExtUtils::Command::MM -e 'warn_if_old_packlist' -- -MACROSTART = -MACROEND = -USEMAKEFILE = -f -FIXIN = $(ABSPERLRUN) -MExtUtils::MY -e 'MY->fixin(shift)' -- - - -# --- MakeMaker makemakerdflt section: -makemakerdflt : all - $(NOECHO) $(NOOP) - - -# --- MakeMaker dist section: -TAR = tar -TARFLAGS = cvf -ZIP = zip -ZIPFLAGS = -r -COMPRESS = gzip --best -SUFFIX = .gz -SHAR = shar -PREOP = $(NOECHO) $(NOOP) -POSTOP = $(NOECHO) $(NOOP) -TO_UNIX = $(NOECHO) $(NOOP) -CI = ci -u -RCS_LABEL = rcs -Nv$(VERSION_SYM): -q -DIST_CP = best -DIST_DEFAULT = tardist -DISTNAME = CRFPP -DISTVNAME = CRFPP- - - -# --- MakeMaker macro section: - - -# --- MakeMaker depend section: - - -# --- MakeMaker cflags section: - -CCFLAGS = -D_REENTRANT -D_GNU_SOURCE -DDEBIAN -fno-strict-aliasing -pipe -fstack-protector -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -OPTIMIZE = -O2 -g -PERLTYPE = -MPOLLUTE = - - -# --- MakeMaker const_loadlibs section: - -# CRFPP might depend on some other libraries: -# See ExtUtils::Liblist for details -# -EXTRALIBS = -lcrfpp -LDLOADLIBS = -lpthread -lcrfpp -BSLOADLIBS = - - -# --- MakeMaker const_cccmd section: -CCCMD = $(CC) -c $(PASTHRU_INC) $(INC) \ - $(CCFLAGS) $(OPTIMIZE) \ - $(PERLTYPE) $(MPOLLUTE) $(DEFINE_VERSION) \ - $(XS_DEFINE_VERSION) - -# --- MakeMaker post_constants section: - - -# --- MakeMaker pasthru section: - -PASTHRU = LIBPERL_A="$(LIBPERL_A)"\ - LINKTYPE="$(LINKTYPE)"\ - OPTIMIZE="$(OPTIMIZE)"\ - PREFIX="$(PREFIX)"\ - PASTHRU_INC="$(PASTHRU_INC)" - - -# --- MakeMaker special_targets section: -.SUFFIXES : .xs .c .C .cpp .i .s .cxx .cc $(OBJ_EXT) - -.PHONY: all config static dynamic test linkext manifest blibdirs clean realclean disttest distdir - - - -# --- MakeMaker c_o section: - -.c.i: - cc -E -c $(PASTHRU_INC) $(INC) \ - $(CCFLAGS) $(OPTIMIZE) \ - $(PERLTYPE) $(MPOLLUTE) $(DEFINE_VERSION) \ - $(XS_DEFINE_VERSION) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.c > $*.i - -.c.s: - $(CCCMD) -S $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.c - -.c$(OBJ_EXT): - $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.c - -.cpp$(OBJ_EXT): - $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.cpp - -.cxx$(OBJ_EXT): - $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.cxx - -.cc$(OBJ_EXT): - $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.cc - -.C$(OBJ_EXT): - $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.C - - -# --- MakeMaker xs_c section: - -.xs.c: - $(XSUBPPRUN) $(XSPROTOARG) $(XSUBPPARGS) $(XSUBPP_EXTRA_ARGS) $*.xs > $*.xsc && $(MV) $*.xsc $*.c - - -# --- MakeMaker xs_o section: - -.xs$(OBJ_EXT): - $(XSUBPPRUN) $(XSPROTOARG) $(XSUBPPARGS) $*.xs > $*.xsc && $(MV) $*.xsc $*.c - $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.c - - -# --- MakeMaker top_targets section: -all :: pure_all manifypods - $(NOECHO) $(NOOP) - - -pure_all :: config pm_to_blib subdirs linkext - $(NOECHO) $(NOOP) - -subdirs :: $(MYEXTLIB) - $(NOECHO) $(NOOP) - -config :: $(FIRST_MAKEFILE) blibdirs - $(NOECHO) $(NOOP) - -help : - perldoc ExtUtils::MakeMaker - - -# --- MakeMaker blibdirs section: -blibdirs : $(INST_LIBDIR)$(DFSEP).exists $(INST_ARCHLIB)$(DFSEP).exists $(INST_AUTODIR)$(DFSEP).exists $(INST_ARCHAUTODIR)$(DFSEP).exists $(INST_BIN)$(DFSEP).exists $(INST_SCRIPT)$(DFSEP).exists $(INST_MAN1DIR)$(DFSEP).exists $(INST_MAN3DIR)$(DFSEP).exists - $(NOECHO) $(NOOP) - -# Backwards compat with 6.18 through 6.25 -blibdirs.ts : blibdirs - $(NOECHO) $(NOOP) - -$(INST_LIBDIR)$(DFSEP).exists :: Makefile.PL - $(NOECHO) $(MKPATH) $(INST_LIBDIR) - $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_LIBDIR) - $(NOECHO) $(TOUCH) $(INST_LIBDIR)$(DFSEP).exists - -$(INST_ARCHLIB)$(DFSEP).exists :: Makefile.PL - $(NOECHO) $(MKPATH) $(INST_ARCHLIB) - $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_ARCHLIB) - $(NOECHO) $(TOUCH) $(INST_ARCHLIB)$(DFSEP).exists - -$(INST_AUTODIR)$(DFSEP).exists :: Makefile.PL - $(NOECHO) $(MKPATH) $(INST_AUTODIR) - $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_AUTODIR) - $(NOECHO) $(TOUCH) $(INST_AUTODIR)$(DFSEP).exists - -$(INST_ARCHAUTODIR)$(DFSEP).exists :: Makefile.PL - $(NOECHO) $(MKPATH) $(INST_ARCHAUTODIR) - $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_ARCHAUTODIR) - $(NOECHO) $(TOUCH) $(INST_ARCHAUTODIR)$(DFSEP).exists - -$(INST_BIN)$(DFSEP).exists :: Makefile.PL - $(NOECHO) $(MKPATH) $(INST_BIN) - $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_BIN) - $(NOECHO) $(TOUCH) $(INST_BIN)$(DFSEP).exists - -$(INST_SCRIPT)$(DFSEP).exists :: Makefile.PL - $(NOECHO) $(MKPATH) $(INST_SCRIPT) - $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_SCRIPT) - $(NOECHO) $(TOUCH) $(INST_SCRIPT)$(DFSEP).exists - -$(INST_MAN1DIR)$(DFSEP).exists :: Makefile.PL - $(NOECHO) $(MKPATH) $(INST_MAN1DIR) - $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_MAN1DIR) - $(NOECHO) $(TOUCH) $(INST_MAN1DIR)$(DFSEP).exists - -$(INST_MAN3DIR)$(DFSEP).exists :: Makefile.PL - $(NOECHO) $(MKPATH) $(INST_MAN3DIR) - $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_MAN3DIR) - $(NOECHO) $(TOUCH) $(INST_MAN3DIR)$(DFSEP).exists - - - -# --- MakeMaker linkext section: - -linkext :: $(LINKTYPE) - $(NOECHO) $(NOOP) - - -# --- MakeMaker dlsyms section: - - -# --- MakeMaker dynamic section: - -dynamic :: $(FIRST_MAKEFILE) $(INST_DYNAMIC) $(INST_BOOT) - $(NOECHO) $(NOOP) - - -# --- MakeMaker dynamic_bs section: -BOOTSTRAP = $(BASEEXT).bs - -# As Mkbootstrap might not write a file (if none is required) -# we use touch to prevent make continually trying to remake it. -# The DynaLoader only reads a non-empty file. -$(BOOTSTRAP) : $(FIRST_MAKEFILE) $(BOOTDEP) $(INST_ARCHAUTODIR)$(DFSEP).exists - $(NOECHO) $(ECHO) "Running Mkbootstrap for $(NAME) ($(BSLOADLIBS))" - $(NOECHO) $(PERLRUN) \ - "-MExtUtils::Mkbootstrap" \ - -e "Mkbootstrap('$(BASEEXT)','$(BSLOADLIBS)');" - $(NOECHO) $(TOUCH) $@ - $(CHMOD) $(PERM_RW) $@ - -$(INST_BOOT) : $(BOOTSTRAP) $(INST_ARCHAUTODIR)$(DFSEP).exists - $(NOECHO) $(RM_RF) $@ - - $(CP) $(BOOTSTRAP) $@ - $(CHMOD) $(PERM_RW) $@ - - -# --- MakeMaker dynamic_lib section: - -# This section creates the dynamically loadable $(INST_DYNAMIC) -# from $(OBJECT) and possibly $(MYEXTLIB). -ARMAYBE = : -OTHERLDFLAGS = -INST_DYNAMIC_DEP = -INST_DYNAMIC_FIX = - -$(INST_DYNAMIC): $(OBJECT) $(MYEXTLIB) $(BOOTSTRAP) $(INST_ARCHAUTODIR)$(DFSEP).exists $(EXPORT_LIST) $(PERL_ARCHIVE) $(PERL_ARCHIVE_AFTER) $(INST_DYNAMIC_DEP) - $(RM_F) $@ - $(LD) $(LDDLFLAGS) $(LDFROM) $(OTHERLDFLAGS) -o $@ $(MYEXTLIB) \ - $(PERL_ARCHIVE) $(LDLOADLIBS) $(PERL_ARCHIVE_AFTER) $(EXPORT_LIST) \ - $(INST_DYNAMIC_FIX) - $(CHMOD) $(PERM_RWX) $@ - - -# --- MakeMaker static section: - -## $(INST_PM) has been moved to the all: target. -## It remains here for awhile to allow for old usage: "make static" -static :: $(FIRST_MAKEFILE) $(INST_STATIC) - $(NOECHO) $(NOOP) - - -# --- MakeMaker static_lib section: - -$(INST_STATIC) : $(OBJECT) $(MYEXTLIB) $(INST_ARCHAUTODIR)$(DFSEP).exists - $(RM_RF) $@ - $(FULL_AR) $(AR_STATIC_ARGS) $@ $(OBJECT) && $(RANLIB) $@ - $(CHMOD) $(PERM_RWX) $@ - $(NOECHO) $(ECHO) "$(EXTRALIBS)" > $(INST_ARCHAUTODIR)/extralibs.ld - - -# --- MakeMaker manifypods section: - -POD2MAN_EXE = $(PERLRUN) "-MExtUtils::Command::MM" -e pod2man "--" -POD2MAN = $(POD2MAN_EXE) - - -manifypods : pure_all - $(NOECHO) $(NOOP) - - - - -# --- MakeMaker processPL section: - - -# --- MakeMaker installbin section: - - -# --- MakeMaker subdirs section: - -# none - -# --- MakeMaker clean_subdirs section: -clean_subdirs : - $(NOECHO) $(NOOP) - - -# --- MakeMaker clean section: - -# Delete temporary files but do not touch installed files. We don't delete -# the Makefile here so a later make realclean still has a makefile to use. - -clean :: clean_subdirs - - $(RM_F) \ - *$(LIB_EXT) core \ - core.[0-9] $(INST_ARCHAUTODIR)/extralibs.all \ - core.[0-9][0-9] $(BASEEXT).bso \ - pm_to_blib.ts core.[0-9][0-9][0-9][0-9] \ - $(BASEEXT).x $(BOOTSTRAP) \ - perl$(EXE_EXT) tmon.out \ - *$(OBJ_EXT) pm_to_blib \ - $(INST_ARCHAUTODIR)/extralibs.ld blibdirs.ts \ - core.[0-9][0-9][0-9][0-9][0-9] *perl.core \ - core.*perl.*.? $(MAKE_APERL_FILE) \ - perl $(BASEEXT).def \ - core.[0-9][0-9][0-9] mon.out \ - lib$(BASEEXT).def perlmain.c \ - perl.exe so_locations \ - $(BASEEXT).exp - - $(RM_RF) \ - blib - - $(MV) $(FIRST_MAKEFILE) $(MAKEFILE_OLD) $(DEV_NULL) - - -# --- MakeMaker realclean_subdirs section: -realclean_subdirs : - $(NOECHO) $(NOOP) - - -# --- MakeMaker realclean section: -# Delete temporary files (via clean) and also delete dist files -realclean purge :: clean realclean_subdirs - - $(RM_F) \ - $(OBJECT) $(MAKEFILE_OLD) \ - $(FIRST_MAKEFILE) - - $(RM_RF) \ - $(DISTVNAME) - - -# --- MakeMaker metafile section: -metafile : create_distdir - $(NOECHO) $(ECHO) Generating META.yml - $(NOECHO) $(ECHO) '--- #YAML:1.0' > META_new.yml - $(NOECHO) $(ECHO) 'name: CRFPP' >> META_new.yml - $(NOECHO) $(ECHO) 'version: ' >> META_new.yml - $(NOECHO) $(ECHO) 'abstract: ~' >> META_new.yml - $(NOECHO) $(ECHO) 'author: []' >> META_new.yml - $(NOECHO) $(ECHO) 'license: unknown' >> META_new.yml - $(NOECHO) $(ECHO) 'distribution_type: module' >> META_new.yml - $(NOECHO) $(ECHO) 'configure_requires:' >> META_new.yml - $(NOECHO) $(ECHO) ' ExtUtils::MakeMaker: 0' >> META_new.yml - $(NOECHO) $(ECHO) 'build_requires:' >> META_new.yml - $(NOECHO) $(ECHO) ' ExtUtils::MakeMaker: 0' >> META_new.yml - $(NOECHO) $(ECHO) 'requires: {}' >> META_new.yml - $(NOECHO) $(ECHO) 'no_index:' >> META_new.yml - $(NOECHO) $(ECHO) ' directory:' >> META_new.yml - $(NOECHO) $(ECHO) ' - t' >> META_new.yml - $(NOECHO) $(ECHO) ' - inc' >> META_new.yml - $(NOECHO) $(ECHO) 'generated_by: ExtUtils::MakeMaker version 6.56' >> META_new.yml - $(NOECHO) $(ECHO) 'meta-spec:' >> META_new.yml - $(NOECHO) $(ECHO) ' url: http://module-build.sourceforge.net/META-spec-v1.4.html' >> META_new.yml - $(NOECHO) $(ECHO) ' version: 1.4' >> META_new.yml - -$(NOECHO) $(MV) META_new.yml $(DISTVNAME)/META.yml - - -# --- MakeMaker signature section: -signature : - cpansign -s - - -# --- MakeMaker dist_basics section: -distclean :: realclean distcheck - $(NOECHO) $(NOOP) - -distcheck : - $(PERLRUN) "-MExtUtils::Manifest=fullcheck" -e fullcheck - -skipcheck : - $(PERLRUN) "-MExtUtils::Manifest=skipcheck" -e skipcheck - -manifest : - $(PERLRUN) "-MExtUtils::Manifest=mkmanifest" -e mkmanifest - -veryclean : realclean - $(RM_F) *~ */*~ *.orig */*.orig *.bak */*.bak *.old */*.old - - - -# --- MakeMaker dist_core section: - -dist : $(DIST_DEFAULT) $(FIRST_MAKEFILE) - $(NOECHO) $(ABSPERLRUN) -l -e 'print '\''Warning: Makefile possibly out of date with $(VERSION_FROM)'\''' \ - -e ' if -e '\''$(VERSION_FROM)'\'' and -M '\''$(VERSION_FROM)'\'' < -M '\''$(FIRST_MAKEFILE)'\'';' -- - -tardist : $(DISTVNAME).tar$(SUFFIX) - $(NOECHO) $(NOOP) - -uutardist : $(DISTVNAME).tar$(SUFFIX) - uuencode $(DISTVNAME).tar$(SUFFIX) $(DISTVNAME).tar$(SUFFIX) > $(DISTVNAME).tar$(SUFFIX)_uu - -$(DISTVNAME).tar$(SUFFIX) : distdir - $(PREOP) - $(TO_UNIX) - $(TAR) $(TARFLAGS) $(DISTVNAME).tar $(DISTVNAME) - $(RM_RF) $(DISTVNAME) - $(COMPRESS) $(DISTVNAME).tar - $(POSTOP) - -zipdist : $(DISTVNAME).zip - $(NOECHO) $(NOOP) - -$(DISTVNAME).zip : distdir - $(PREOP) - $(ZIP) $(ZIPFLAGS) $(DISTVNAME).zip $(DISTVNAME) - $(RM_RF) $(DISTVNAME) - $(POSTOP) - -shdist : distdir - $(PREOP) - $(SHAR) $(DISTVNAME) > $(DISTVNAME).shar - $(RM_RF) $(DISTVNAME) - $(POSTOP) - - -# --- MakeMaker distdir section: -create_distdir : - $(RM_RF) $(DISTVNAME) - $(PERLRUN) "-MExtUtils::Manifest=manicopy,maniread" \ - -e "manicopy(maniread(),'$(DISTVNAME)', '$(DIST_CP)');" - -distdir : create_distdir distmeta - $(NOECHO) $(NOOP) - - - -# --- MakeMaker dist_test section: -disttest : distdir - cd $(DISTVNAME) && $(ABSPERLRUN) Makefile.PL - cd $(DISTVNAME) && $(MAKE) $(PASTHRU) - cd $(DISTVNAME) && $(MAKE) test $(PASTHRU) - - - -# --- MakeMaker dist_ci section: - -ci : - $(PERLRUN) "-MExtUtils::Manifest=maniread" \ - -e "@all = keys %{ maniread() };" \ - -e "print(qq{Executing $(CI) @all\n}); system(qq{$(CI) @all});" \ - -e "print(qq{Executing $(RCS_LABEL) ...\n}); system(qq{$(RCS_LABEL) @all});" - - -# --- MakeMaker distmeta section: -distmeta : create_distdir metafile - $(NOECHO) cd $(DISTVNAME) && $(ABSPERLRUN) -MExtUtils::Manifest=maniadd -e 'eval { maniadd({q{META.yml} => q{Module meta-data (added by MakeMaker)}}) } ' \ - -e ' or print "Could not add META.yml to MANIFEST: $${'\''@'\''}\n"' -- - - - -# --- MakeMaker distsignature section: -distsignature : create_distdir - $(NOECHO) cd $(DISTVNAME) && $(ABSPERLRUN) -MExtUtils::Manifest=maniadd -e 'eval { maniadd({q{SIGNATURE} => q{Public-key signature (added by MakeMaker)}}) } ' \ - -e ' or print "Could not add SIGNATURE to MANIFEST: $${'\''@'\''}\n"' -- - $(NOECHO) cd $(DISTVNAME) && $(TOUCH) SIGNATURE - cd $(DISTVNAME) && cpansign -s - - - -# --- MakeMaker install section: - -install :: pure_install doc_install - $(NOECHO) $(NOOP) - -install_perl :: pure_perl_install doc_perl_install - $(NOECHO) $(NOOP) - -install_site :: pure_site_install doc_site_install - $(NOECHO) $(NOOP) - -install_vendor :: pure_vendor_install doc_vendor_install - $(NOECHO) $(NOOP) - -pure_install :: pure_$(INSTALLDIRS)_install - $(NOECHO) $(NOOP) - -doc_install :: doc_$(INSTALLDIRS)_install - $(NOECHO) $(NOOP) - -pure__install : pure_site_install - $(NOECHO) $(ECHO) INSTALLDIRS not defined, defaulting to INSTALLDIRS=site - -doc__install : doc_site_install - $(NOECHO) $(ECHO) INSTALLDIRS not defined, defaulting to INSTALLDIRS=site - -pure_perl_install :: all - $(NOECHO) umask 022; $(MOD_INSTALL) \ - $(INST_LIB) $(DESTINSTALLPRIVLIB) \ - $(INST_ARCHLIB) $(DESTINSTALLARCHLIB) \ - $(INST_BIN) $(DESTINSTALLBIN) \ - $(INST_SCRIPT) $(DESTINSTALLSCRIPT) \ - $(INST_MAN1DIR) $(DESTINSTALLMAN1DIR) \ - $(INST_MAN3DIR) $(DESTINSTALLMAN3DIR) - $(NOECHO) $(WARN_IF_OLD_PACKLIST) \ - $(SITEARCHEXP)/auto/$(FULLEXT) - - -pure_site_install :: all - $(NOECHO) umask 02; $(MOD_INSTALL) \ - read $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist \ - write $(DESTINSTALLSITEARCH)/auto/$(FULLEXT)/.packlist \ - $(INST_LIB) $(DESTINSTALLSITELIB) \ - $(INST_ARCHLIB) $(DESTINSTALLSITEARCH) \ - $(INST_BIN) $(DESTINSTALLSITEBIN) \ - $(INST_SCRIPT) $(DESTINSTALLSITESCRIPT) \ - $(INST_MAN1DIR) $(DESTINSTALLSITEMAN1DIR) \ - $(INST_MAN3DIR) $(DESTINSTALLSITEMAN3DIR) - $(NOECHO) $(WARN_IF_OLD_PACKLIST) \ - $(PERL_ARCHLIB)/auto/$(FULLEXT) - -pure_vendor_install :: all - $(NOECHO) umask 022; $(MOD_INSTALL) \ - $(INST_LIB) $(DESTINSTALLVENDORLIB) \ - $(INST_ARCHLIB) $(DESTINSTALLVENDORARCH) \ - $(INST_BIN) $(DESTINSTALLVENDORBIN) \ - $(INST_SCRIPT) $(DESTINSTALLVENDORSCRIPT) \ - $(INST_MAN1DIR) $(DESTINSTALLVENDORMAN1DIR) \ - $(INST_MAN3DIR) $(DESTINSTALLVENDORMAN3DIR) - -doc_perl_install :: all - -doc_site_install :: all - $(NOECHO) $(ECHO) Appending installation info to $(DESTINSTALLSITEARCH)/perllocal.pod - -$(NOECHO) umask 02; $(MKPATH) $(DESTINSTALLSITEARCH) - -$(NOECHO) umask 02; $(DOC_INSTALL) \ - "Module" "$(NAME)" \ - "installed into" "$(INSTALLSITELIB)" \ - LINKTYPE "$(LINKTYPE)" \ - VERSION "$(VERSION)" \ - EXE_FILES "$(EXE_FILES)" \ - >> $(DESTINSTALLSITEARCH)/perllocal.pod - -doc_vendor_install :: all - - -uninstall :: uninstall_from_$(INSTALLDIRS)dirs - $(NOECHO) $(NOOP) - -uninstall_from_perldirs :: - -uninstall_from_sitedirs :: - $(NOECHO) $(UNINSTALL) $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist - -uninstall_from_vendordirs :: - - - -# --- MakeMaker force section: -# Phony target to force checking subdirectories. -FORCE : - $(NOECHO) $(NOOP) - - -# --- MakeMaker perldepend section: - -PERL_HDRS = \ - $(PERL_INC)/EXTERN.h \ - $(PERL_INC)/INTERN.h \ - $(PERL_INC)/XSUB.h \ - $(PERL_INC)/av.h \ - $(PERL_INC)/cc_runtime.h \ - $(PERL_INC)/config.h \ - $(PERL_INC)/cop.h \ - $(PERL_INC)/cv.h \ - $(PERL_INC)/dosish.h \ - $(PERL_INC)/embed.h \ - $(PERL_INC)/embedvar.h \ - $(PERL_INC)/fakethr.h \ - $(PERL_INC)/form.h \ - $(PERL_INC)/gv.h \ - $(PERL_INC)/handy.h \ - $(PERL_INC)/hv.h \ - $(PERL_INC)/intrpvar.h \ - $(PERL_INC)/iperlsys.h \ - $(PERL_INC)/keywords.h \ - $(PERL_INC)/mg.h \ - $(PERL_INC)/nostdio.h \ - $(PERL_INC)/op.h \ - $(PERL_INC)/opcode.h \ - $(PERL_INC)/patchlevel.h \ - $(PERL_INC)/perl.h \ - $(PERL_INC)/perlio.h \ - $(PERL_INC)/perlsdio.h \ - $(PERL_INC)/perlsfio.h \ - $(PERL_INC)/perlvars.h \ - $(PERL_INC)/perly.h \ - $(PERL_INC)/pp.h \ - $(PERL_INC)/pp_proto.h \ - $(PERL_INC)/proto.h \ - $(PERL_INC)/regcomp.h \ - $(PERL_INC)/regexp.h \ - $(PERL_INC)/regnodes.h \ - $(PERL_INC)/scope.h \ - $(PERL_INC)/sv.h \ - $(PERL_INC)/thread.h \ - $(PERL_INC)/unixish.h \ - $(PERL_INC)/util.h - -$(OBJECT) : $(PERL_HDRS) - - -# --- MakeMaker makefile section: - -$(OBJECT) : $(FIRST_MAKEFILE) - -# We take a very conservative approach here, but it's worth it. -# We move Makefile to Makefile.old here to avoid gnu make looping. -$(FIRST_MAKEFILE) : Makefile.PL $(CONFIGDEP) - $(NOECHO) $(ECHO) "Makefile out-of-date with respect to $?" - $(NOECHO) $(ECHO) "Cleaning current config before rebuilding Makefile..." - -$(NOECHO) $(RM_F) $(MAKEFILE_OLD) - -$(NOECHO) $(MV) $(FIRST_MAKEFILE) $(MAKEFILE_OLD) - - $(MAKE) $(USEMAKEFILE) $(MAKEFILE_OLD) clean $(DEV_NULL) - $(PERLRUN) Makefile.PL - $(NOECHO) $(ECHO) "==> Your Makefile has been rebuilt. <==" - $(NOECHO) $(ECHO) "==> Please rerun the $(MAKE) command. <==" - $(FALSE) - - - -# --- MakeMaker staticmake section: - -# --- MakeMaker makeaperl section --- -MAP_TARGET = perl -FULLPERL = /usr/bin/perl - -$(MAP_TARGET) :: static $(MAKE_APERL_FILE) - $(MAKE) $(USEMAKEFILE) $(MAKE_APERL_FILE) $@ - -$(MAKE_APERL_FILE) : $(FIRST_MAKEFILE) pm_to_blib - $(NOECHO) $(ECHO) Writing \"$(MAKE_APERL_FILE)\" for this $(MAP_TARGET) - $(NOECHO) $(PERLRUNINST) \ - Makefile.PL DIR= \ - MAKEFILE=$(MAKE_APERL_FILE) LINKTYPE=static \ - MAKEAPERL=1 NORECURS=1 CCCDLFLAGS= - - -# --- MakeMaker test section: - -TEST_VERBOSE=0 -TEST_TYPE=test_$(LINKTYPE) -TEST_FILE = test.pl -TEST_FILES = -TESTDB_SW = -d - -testdb :: testdb_$(LINKTYPE) - -test :: $(TEST_TYPE) subdirs-test - -subdirs-test :: - $(NOECHO) $(NOOP) - - -test_dynamic :: pure_all - PERL_DL_NONLAZY=1 $(FULLPERLRUN) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE) - -testdb_dynamic :: pure_all - PERL_DL_NONLAZY=1 $(FULLPERLRUN) $(TESTDB_SW) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE) - -test_ : test_dynamic - -test_static :: pure_all $(MAP_TARGET) - PERL_DL_NONLAZY=1 ./$(MAP_TARGET) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE) - -testdb_static :: pure_all $(MAP_TARGET) - PERL_DL_NONLAZY=1 ./$(MAP_TARGET) $(TESTDB_SW) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE) - - - -# --- MakeMaker ppd section: -# Creates a PPD (Perl Package Description) for a binary distribution. -ppd : - $(NOECHO) $(ECHO) '' > $(DISTNAME).ppd - $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd - $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd - $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd - $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd - $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd - $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd - $(NOECHO) $(ECHO) '' >> $(DISTNAME).ppd - - -# --- MakeMaker pm_to_blib section: - -pm_to_blib : $(FIRST_MAKEFILE) $(TO_INST_PM) - $(NOECHO) $(ABSPERLRUN) -MExtUtils::Install -e 'pm_to_blib({@ARGV}, '\''$(INST_LIB)/auto'\'', q[$(PM_FILTER)], '\''$(PERM_DIR)'\'')' -- \ - CRFPP.pm $(INST_LIB)/CRFPP.pm - $(NOECHO) $(TOUCH) pm_to_blib - - -# --- MakeMaker selfdocument section: - - -# --- MakeMaker postamble section: - - -# End. +# This Makefile is for the CRFPP extension to perl. +# +# It was generated automatically by MakeMaker version +# 6.56 (Revision: 65600) from the contents of +# Makefile.PL. Don't edit this file, edit Makefile.PL instead. +# +# ANY CHANGES MADE HERE WILL BE LOST! +# +# MakeMaker ARGV: () +# + +# MakeMaker Parameters: + +# BUILD_REQUIRES => { } +# CC => q[c++] +# INC => q[] +# LD => q[c++] +# LIBS => q[-lpthread -lcrfpp] +# NAME => q[CRFPP] +# OBJECT => q[CRFPP_wrap.o] +# PREREQ_PM => { } + +# --- MakeMaker post_initialize section: + + +# --- MakeMaker const_config section: + +# These definitions are from config.sh (via /usr/lib/perl/5.12/Config.pm). +# They may have been overridden via Makefile.PL or on the command line. +AR = ar +CC = c++ +CCCDLFLAGS = -fPIC +CCDLFLAGS = -Wl,-E +DLEXT = so +DLSRC = dl_dlopen.xs +EXE_EXT = +FULL_AR = /usr/bin/ar +LD = c++ +LDDLFLAGS = -shared -O2 -g -L/usr/local/lib -fstack-protector +LDFLAGS = -fstack-protector -L/usr/local/lib +LIBC = +LIB_EXT = .a +OBJ_EXT = .o +OSNAME = linux +OSVERS = 2.6.24-28-server +RANLIB = : +SITELIBEXP = /usr/local/share/perl/5.12.4 +SITEARCHEXP = /usr/local/lib/perl/5.12.4 +SO = so +VENDORARCHEXP = /usr/lib/perl5 +VENDORLIBEXP = /usr/share/perl5 + + +# --- MakeMaker constants section: +AR_STATIC_ARGS = cr +DIRFILESEP = / +DFSEP = $(DIRFILESEP) +NAME = CRFPP +NAME_SYM = CRFPP +VERSION = +VERSION_MACRO = VERSION +VERSION_SYM = +DEFINE_VERSION = -D$(VERSION_MACRO)=\"$(VERSION)\" +XS_VERSION = +XS_VERSION_MACRO = XS_VERSION +XS_DEFINE_VERSION = -D$(XS_VERSION_MACRO)=\"$(XS_VERSION)\" +INST_ARCHLIB = blib/arch +INST_SCRIPT = blib/script +INST_BIN = blib/bin +INST_LIB = blib/lib +INST_MAN1DIR = blib/man1 +INST_MAN3DIR = blib/man3 +MAN1EXT = 1p +MAN3EXT = 3pm +INSTALLDIRS = site +DESTDIR = +PREFIX = /usr +PERLPREFIX = $(PREFIX) +SITEPREFIX = $(PREFIX)/local +VENDORPREFIX = $(PREFIX) +INSTALLPRIVLIB = $(PERLPREFIX)/share/perl/5.12 +DESTINSTALLPRIVLIB = $(DESTDIR)$(INSTALLPRIVLIB) +INSTALLSITELIB = $(SITEPREFIX)/share/perl/5.12.4 +DESTINSTALLSITELIB = $(DESTDIR)$(INSTALLSITELIB) +INSTALLVENDORLIB = $(VENDORPREFIX)/share/perl5 +DESTINSTALLVENDORLIB = $(DESTDIR)$(INSTALLVENDORLIB) +INSTALLARCHLIB = $(PERLPREFIX)/lib/perl/5.12 +DESTINSTALLARCHLIB = $(DESTDIR)$(INSTALLARCHLIB) +INSTALLSITEARCH = $(SITEPREFIX)/lib/perl/5.12.4 +DESTINSTALLSITEARCH = $(DESTDIR)$(INSTALLSITEARCH) +INSTALLVENDORARCH = $(VENDORPREFIX)/lib/perl5 +DESTINSTALLVENDORARCH = $(DESTDIR)$(INSTALLVENDORARCH) +INSTALLBIN = $(PERLPREFIX)/bin +DESTINSTALLBIN = $(DESTDIR)$(INSTALLBIN) +INSTALLSITEBIN = $(SITEPREFIX)/bin +DESTINSTALLSITEBIN = $(DESTDIR)$(INSTALLSITEBIN) +INSTALLVENDORBIN = $(VENDORPREFIX)/bin +DESTINSTALLVENDORBIN = $(DESTDIR)$(INSTALLVENDORBIN) +INSTALLSCRIPT = $(PERLPREFIX)/bin +DESTINSTALLSCRIPT = $(DESTDIR)$(INSTALLSCRIPT) +INSTALLSITESCRIPT = $(SITEPREFIX)/bin +DESTINSTALLSITESCRIPT = $(DESTDIR)$(INSTALLSITESCRIPT) +INSTALLVENDORSCRIPT = $(VENDORPREFIX)/bin +DESTINSTALLVENDORSCRIPT = $(DESTDIR)$(INSTALLVENDORSCRIPT) +INSTALLMAN1DIR = $(PERLPREFIX)/share/man/man1 +DESTINSTALLMAN1DIR = $(DESTDIR)$(INSTALLMAN1DIR) +INSTALLSITEMAN1DIR = $(SITEPREFIX)/man/man1 +DESTINSTALLSITEMAN1DIR = $(DESTDIR)$(INSTALLSITEMAN1DIR) +INSTALLVENDORMAN1DIR = $(VENDORPREFIX)/share/man/man1 +DESTINSTALLVENDORMAN1DIR = $(DESTDIR)$(INSTALLVENDORMAN1DIR) +INSTALLMAN3DIR = $(PERLPREFIX)/share/man/man3 +DESTINSTALLMAN3DIR = $(DESTDIR)$(INSTALLMAN3DIR) +INSTALLSITEMAN3DIR = $(SITEPREFIX)/man/man3 +DESTINSTALLSITEMAN3DIR = $(DESTDIR)$(INSTALLSITEMAN3DIR) +INSTALLVENDORMAN3DIR = $(VENDORPREFIX)/share/man/man3 +DESTINSTALLVENDORMAN3DIR = $(DESTDIR)$(INSTALLVENDORMAN3DIR) +PERL_LIB = /usr/share/perl/5.12 +PERL_ARCHLIB = /usr/lib/perl/5.12 +LIBPERL_A = libperl.a +FIRST_MAKEFILE = Makefile +MAKEFILE_OLD = Makefile.old +MAKE_APERL_FILE = Makefile.aperl +PERLMAINCC = $(CC) +PERL_INC = /usr/lib/perl/5.12/CORE +PERL = /usr/bin/perl +FULLPERL = /usr/bin/perl +ABSPERL = $(PERL) +PERLRUN = $(PERL) +FULLPERLRUN = $(FULLPERL) +ABSPERLRUN = $(ABSPERL) +PERLRUNINST = $(PERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)" +FULLPERLRUNINST = $(FULLPERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)" +ABSPERLRUNINST = $(ABSPERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)" +PERL_CORE = 0 +PERM_DIR = 755 +PERM_RW = 644 +PERM_RWX = 755 + +MAKEMAKER = /usr/share/perl/5.12/ExtUtils/MakeMaker.pm +MM_VERSION = 6.56 +MM_REVISION = 65600 + +# FULLEXT = Pathname for extension directory (eg Foo/Bar/Oracle). +# BASEEXT = Basename part of FULLEXT. May be just equal FULLEXT. (eg Oracle) +# PARENT_NAME = NAME without BASEEXT and no trailing :: (eg Foo::Bar) +# DLBASE = Basename part of dynamic library. May be just equal BASEEXT. +MAKE = make +FULLEXT = CRFPP +BASEEXT = CRFPP +PARENT_NAME = +DLBASE = $(BASEEXT) +VERSION_FROM = +INC = +OBJECT = CRFPP_wrap$(OBJ_EXT) +LDFROM = $(OBJECT) +LINKTYPE = dynamic +BOOTDEP = + +# Handy lists of source code files: +XS_FILES = +C_FILES = CRFPP_wrap.cxx +O_FILES = CRFPP_wrap.o +H_FILES = +MAN1PODS = +MAN3PODS = + +# Where is the Config information that we are using/depend on +CONFIGDEP = $(PERL_ARCHLIB)$(DFSEP)Config.pm $(PERL_INC)$(DFSEP)config.h + +# Where to build things +INST_LIBDIR = $(INST_LIB) +INST_ARCHLIBDIR = $(INST_ARCHLIB) + +INST_AUTODIR = $(INST_LIB)/auto/$(FULLEXT) +INST_ARCHAUTODIR = $(INST_ARCHLIB)/auto/$(FULLEXT) + +INST_STATIC = $(INST_ARCHAUTODIR)/$(BASEEXT)$(LIB_EXT) +INST_DYNAMIC = $(INST_ARCHAUTODIR)/$(DLBASE).$(DLEXT) +INST_BOOT = $(INST_ARCHAUTODIR)/$(BASEEXT).bs + +# Extra linker info +EXPORT_LIST = +PERL_ARCHIVE = +PERL_ARCHIVE_AFTER = + + +TO_INST_PM = CRFPP.pm + +PM_TO_BLIB = CRFPP.pm \ + $(INST_LIB)/CRFPP.pm + + +# --- MakeMaker platform_constants section: +MM_Unix_VERSION = 6.56 +PERL_MALLOC_DEF = -DPERL_EXTMALLOC_DEF -Dmalloc=Perl_malloc -Dfree=Perl_mfree -Drealloc=Perl_realloc -Dcalloc=Perl_calloc + + +# --- MakeMaker tool_autosplit section: +# Usage: $(AUTOSPLITFILE) FileToSplit AutoDirToSplitInto +AUTOSPLITFILE = $(ABSPERLRUN) -e 'use AutoSplit; autosplit($$ARGV[0], $$ARGV[1], 0, 1, 1)' -- + + + +# --- MakeMaker tool_xsubpp section: + +XSUBPPDIR = /usr/share/perl/5.12/ExtUtils +XSUBPP = $(XSUBPPDIR)$(DFSEP)xsubpp +XSUBPPRUN = $(PERLRUN) $(XSUBPP) +XSPROTOARG = +XSUBPPDEPS = /usr/share/perl/5.12/ExtUtils/typemap $(XSUBPP) +XSUBPPARGS = -typemap /usr/share/perl/5.12/ExtUtils/typemap +XSUBPP_EXTRA_ARGS = + + +# --- MakeMaker tools_other section: +SHELL = /bin/sh +CHMOD = chmod +CP = cp +MV = mv +NOOP = $(TRUE) +NOECHO = @ +RM_F = rm -f +RM_RF = rm -rf +TEST_F = test -f +TOUCH = touch +UMASK_NULL = umask 0 +DEV_NULL = > /dev/null 2>&1 +MKPATH = $(ABSPERLRUN) -MExtUtils::Command -e 'mkpath' -- +EQUALIZE_TIMESTAMP = $(ABSPERLRUN) -MExtUtils::Command -e 'eqtime' -- +FALSE = false +TRUE = true +ECHO = echo +ECHO_N = echo -n +UNINST = 0 +VERBINST = 0 +MOD_INSTALL = $(ABSPERLRUN) -MExtUtils::Install -e 'install([ from_to => {@ARGV}, verbose => '\''$(VERBINST)'\'', uninstall_shadows => '\''$(UNINST)'\'', dir_mode => '\''$(PERM_DIR)'\'' ]);' -- +DOC_INSTALL = $(ABSPERLRUN) -MExtUtils::Command::MM -e 'perllocal_install' -- +UNINSTALL = $(ABSPERLRUN) -MExtUtils::Command::MM -e 'uninstall' -- +WARN_IF_OLD_PACKLIST = $(ABSPERLRUN) -MExtUtils::Command::MM -e 'warn_if_old_packlist' -- +MACROSTART = +MACROEND = +USEMAKEFILE = -f +FIXIN = $(ABSPERLRUN) -MExtUtils::MY -e 'MY->fixin(shift)' -- + + +# --- MakeMaker makemakerdflt section: +makemakerdflt : all + $(NOECHO) $(NOOP) + + +# --- MakeMaker dist section: +TAR = tar +TARFLAGS = cvf +ZIP = zip +ZIPFLAGS = -r +COMPRESS = gzip --best +SUFFIX = .gz +SHAR = shar +PREOP = $(NOECHO) $(NOOP) +POSTOP = $(NOECHO) $(NOOP) +TO_UNIX = $(NOECHO) $(NOOP) +CI = ci -u +RCS_LABEL = rcs -Nv$(VERSION_SYM): -q +DIST_CP = best +DIST_DEFAULT = tardist +DISTNAME = CRFPP +DISTVNAME = CRFPP- + + +# --- MakeMaker macro section: + + +# --- MakeMaker depend section: + + +# --- MakeMaker cflags section: + +CCFLAGS = -D_REENTRANT -D_GNU_SOURCE -DDEBIAN -fno-strict-aliasing -pipe -fstack-protector -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 +OPTIMIZE = -O2 -g +PERLTYPE = +MPOLLUTE = + + +# --- MakeMaker const_loadlibs section: + +# CRFPP might depend on some other libraries: +# See ExtUtils::Liblist for details +# +EXTRALIBS = -lcrfpp +LDLOADLIBS = -lpthread -lcrfpp +BSLOADLIBS = + + +# --- MakeMaker const_cccmd section: +CCCMD = $(CC) -c $(PASTHRU_INC) $(INC) \ + $(CCFLAGS) $(OPTIMIZE) \ + $(PERLTYPE) $(MPOLLUTE) $(DEFINE_VERSION) \ + $(XS_DEFINE_VERSION) + +# --- MakeMaker post_constants section: + + +# --- MakeMaker pasthru section: + +PASTHRU = LIBPERL_A="$(LIBPERL_A)"\ + LINKTYPE="$(LINKTYPE)"\ + OPTIMIZE="$(OPTIMIZE)"\ + PREFIX="$(PREFIX)"\ + PASTHRU_INC="$(PASTHRU_INC)" + + +# --- MakeMaker special_targets section: +.SUFFIXES : .xs .c .C .cpp .i .s .cxx .cc $(OBJ_EXT) + +.PHONY: all config static dynamic test linkext manifest blibdirs clean realclean disttest distdir + + + +# --- MakeMaker c_o section: + +.c.i: + cc -E -c $(PASTHRU_INC) $(INC) \ + $(CCFLAGS) $(OPTIMIZE) \ + $(PERLTYPE) $(MPOLLUTE) $(DEFINE_VERSION) \ + $(XS_DEFINE_VERSION) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.c > $*.i + +.c.s: + $(CCCMD) -S $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.c + +.c$(OBJ_EXT): + $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.c + +.cpp$(OBJ_EXT): + $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.cpp + +.cxx$(OBJ_EXT): + $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.cxx + +.cc$(OBJ_EXT): + $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.cc + +.C$(OBJ_EXT): + $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.C + + +# --- MakeMaker xs_c section: + +.xs.c: + $(XSUBPPRUN) $(XSPROTOARG) $(XSUBPPARGS) $(XSUBPP_EXTRA_ARGS) $*.xs > $*.xsc && $(MV) $*.xsc $*.c + + +# --- MakeMaker xs_o section: + +.xs$(OBJ_EXT): + $(XSUBPPRUN) $(XSPROTOARG) $(XSUBPPARGS) $*.xs > $*.xsc && $(MV) $*.xsc $*.c + $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) $*.c + + +# --- MakeMaker top_targets section: +all :: pure_all manifypods + $(NOECHO) $(NOOP) + + +pure_all :: config pm_to_blib subdirs linkext + $(NOECHO) $(NOOP) + +subdirs :: $(MYEXTLIB) + $(NOECHO) $(NOOP) + +config :: $(FIRST_MAKEFILE) blibdirs + $(NOECHO) $(NOOP) + +help : + perldoc ExtUtils::MakeMaker + + +# --- MakeMaker blibdirs section: +blibdirs : $(INST_LIBDIR)$(DFSEP).exists $(INST_ARCHLIB)$(DFSEP).exists $(INST_AUTODIR)$(DFSEP).exists $(INST_ARCHAUTODIR)$(DFSEP).exists $(INST_BIN)$(DFSEP).exists $(INST_SCRIPT)$(DFSEP).exists $(INST_MAN1DIR)$(DFSEP).exists $(INST_MAN3DIR)$(DFSEP).exists + $(NOECHO) $(NOOP) + +# Backwards compat with 6.18 through 6.25 +blibdirs.ts : blibdirs + $(NOECHO) $(NOOP) + +$(INST_LIBDIR)$(DFSEP).exists :: Makefile.PL + $(NOECHO) $(MKPATH) $(INST_LIBDIR) + $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_LIBDIR) + $(NOECHO) $(TOUCH) $(INST_LIBDIR)$(DFSEP).exists + +$(INST_ARCHLIB)$(DFSEP).exists :: Makefile.PL + $(NOECHO) $(MKPATH) $(INST_ARCHLIB) + $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_ARCHLIB) + $(NOECHO) $(TOUCH) $(INST_ARCHLIB)$(DFSEP).exists + +$(INST_AUTODIR)$(DFSEP).exists :: Makefile.PL + $(NOECHO) $(MKPATH) $(INST_AUTODIR) + $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_AUTODIR) + $(NOECHO) $(TOUCH) $(INST_AUTODIR)$(DFSEP).exists + +$(INST_ARCHAUTODIR)$(DFSEP).exists :: Makefile.PL + $(NOECHO) $(MKPATH) $(INST_ARCHAUTODIR) + $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_ARCHAUTODIR) + $(NOECHO) $(TOUCH) $(INST_ARCHAUTODIR)$(DFSEP).exists + +$(INST_BIN)$(DFSEP).exists :: Makefile.PL + $(NOECHO) $(MKPATH) $(INST_BIN) + $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_BIN) + $(NOECHO) $(TOUCH) $(INST_BIN)$(DFSEP).exists + +$(INST_SCRIPT)$(DFSEP).exists :: Makefile.PL + $(NOECHO) $(MKPATH) $(INST_SCRIPT) + $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_SCRIPT) + $(NOECHO) $(TOUCH) $(INST_SCRIPT)$(DFSEP).exists + +$(INST_MAN1DIR)$(DFSEP).exists :: Makefile.PL + $(NOECHO) $(MKPATH) $(INST_MAN1DIR) + $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_MAN1DIR) + $(NOECHO) $(TOUCH) $(INST_MAN1DIR)$(DFSEP).exists + +$(INST_MAN3DIR)$(DFSEP).exists :: Makefile.PL + $(NOECHO) $(MKPATH) $(INST_MAN3DIR) + $(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_MAN3DIR) + $(NOECHO) $(TOUCH) $(INST_MAN3DIR)$(DFSEP).exists + + + +# --- MakeMaker linkext section: + +linkext :: $(LINKTYPE) + $(NOECHO) $(NOOP) + + +# --- MakeMaker dlsyms section: + + +# --- MakeMaker dynamic section: + +dynamic :: $(FIRST_MAKEFILE) $(INST_DYNAMIC) $(INST_BOOT) + $(NOECHO) $(NOOP) + + +# --- MakeMaker dynamic_bs section: +BOOTSTRAP = $(BASEEXT).bs + +# As Mkbootstrap might not write a file (if none is required) +# we use touch to prevent make continually trying to remake it. +# The DynaLoader only reads a non-empty file. +$(BOOTSTRAP) : $(FIRST_MAKEFILE) $(BOOTDEP) $(INST_ARCHAUTODIR)$(DFSEP).exists + $(NOECHO) $(ECHO) "Running Mkbootstrap for $(NAME) ($(BSLOADLIBS))" + $(NOECHO) $(PERLRUN) \ + "-MExtUtils::Mkbootstrap" \ + -e "Mkbootstrap('$(BASEEXT)','$(BSLOADLIBS)');" + $(NOECHO) $(TOUCH) $@ + $(CHMOD) $(PERM_RW) $@ + +$(INST_BOOT) : $(BOOTSTRAP) $(INST_ARCHAUTODIR)$(DFSEP).exists + $(NOECHO) $(RM_RF) $@ + - $(CP) $(BOOTSTRAP) $@ + $(CHMOD) $(PERM_RW) $@ + + +# --- MakeMaker dynamic_lib section: + +# This section creates the dynamically loadable $(INST_DYNAMIC) +# from $(OBJECT) and possibly $(MYEXTLIB). +ARMAYBE = : +OTHERLDFLAGS = +INST_DYNAMIC_DEP = +INST_DYNAMIC_FIX = + +$(INST_DYNAMIC): $(OBJECT) $(MYEXTLIB) $(BOOTSTRAP) $(INST_ARCHAUTODIR)$(DFSEP).exists $(EXPORT_LIST) $(PERL_ARCHIVE) $(PERL_ARCHIVE_AFTER) $(INST_DYNAMIC_DEP) + $(RM_F) $@ + $(LD) $(LDDLFLAGS) $(LDFROM) $(OTHERLDFLAGS) -o $@ $(MYEXTLIB) \ + $(PERL_ARCHIVE) $(LDLOADLIBS) $(PERL_ARCHIVE_AFTER) $(EXPORT_LIST) \ + $(INST_DYNAMIC_FIX) + $(CHMOD) $(PERM_RWX) $@ + + +# --- MakeMaker static section: + +## $(INST_PM) has been moved to the all: target. +## It remains here for awhile to allow for old usage: "make static" +static :: $(FIRST_MAKEFILE) $(INST_STATIC) + $(NOECHO) $(NOOP) + + +# --- MakeMaker static_lib section: + +$(INST_STATIC) : $(OBJECT) $(MYEXTLIB) $(INST_ARCHAUTODIR)$(DFSEP).exists + $(RM_RF) $@ + $(FULL_AR) $(AR_STATIC_ARGS) $@ $(OBJECT) && $(RANLIB) $@ + $(CHMOD) $(PERM_RWX) $@ + $(NOECHO) $(ECHO) "$(EXTRALIBS)" > $(INST_ARCHAUTODIR)/extralibs.ld + + +# --- MakeMaker manifypods section: + +POD2MAN_EXE = $(PERLRUN) "-MExtUtils::Command::MM" -e pod2man "--" +POD2MAN = $(POD2MAN_EXE) + + +manifypods : pure_all + $(NOECHO) $(NOOP) + + + + +# --- MakeMaker processPL section: + + +# --- MakeMaker installbin section: + + +# --- MakeMaker subdirs section: + +# none + +# --- MakeMaker clean_subdirs section: +clean_subdirs : + $(NOECHO) $(NOOP) + + +# --- MakeMaker clean section: + +# Delete temporary files but do not touch installed files. We don't delete +# the Makefile here so a later make realclean still has a makefile to use. + +clean :: clean_subdirs + - $(RM_F) \ + *$(LIB_EXT) core \ + core.[0-9] $(INST_ARCHAUTODIR)/extralibs.all \ + core.[0-9][0-9] $(BASEEXT).bso \ + pm_to_blib.ts core.[0-9][0-9][0-9][0-9] \ + $(BASEEXT).x $(BOOTSTRAP) \ + perl$(EXE_EXT) tmon.out \ + *$(OBJ_EXT) pm_to_blib \ + $(INST_ARCHAUTODIR)/extralibs.ld blibdirs.ts \ + core.[0-9][0-9][0-9][0-9][0-9] *perl.core \ + core.*perl.*.? $(MAKE_APERL_FILE) \ + perl $(BASEEXT).def \ + core.[0-9][0-9][0-9] mon.out \ + lib$(BASEEXT).def perlmain.c \ + perl.exe so_locations \ + $(BASEEXT).exp + - $(RM_RF) \ + blib + - $(MV) $(FIRST_MAKEFILE) $(MAKEFILE_OLD) $(DEV_NULL) + + +# --- MakeMaker realclean_subdirs section: +realclean_subdirs : + $(NOECHO) $(NOOP) + + +# --- MakeMaker realclean section: +# Delete temporary files (via clean) and also delete dist files +realclean purge :: clean realclean_subdirs + - $(RM_F) \ + $(OBJECT) $(MAKEFILE_OLD) \ + $(FIRST_MAKEFILE) + - $(RM_RF) \ + $(DISTVNAME) + + +# --- MakeMaker metafile section: +metafile : create_distdir + $(NOECHO) $(ECHO) Generating META.yml + $(NOECHO) $(ECHO) '--- #YAML:1.0' > META_new.yml + $(NOECHO) $(ECHO) 'name: CRFPP' >> META_new.yml + $(NOECHO) $(ECHO) 'version: ' >> META_new.yml + $(NOECHO) $(ECHO) 'abstract: ~' >> META_new.yml + $(NOECHO) $(ECHO) 'author: []' >> META_new.yml + $(NOECHO) $(ECHO) 'license: unknown' >> META_new.yml + $(NOECHO) $(ECHO) 'distribution_type: module' >> META_new.yml + $(NOECHO) $(ECHO) 'configure_requires:' >> META_new.yml + $(NOECHO) $(ECHO) ' ExtUtils::MakeMaker: 0' >> META_new.yml + $(NOECHO) $(ECHO) 'build_requires:' >> META_new.yml + $(NOECHO) $(ECHO) ' ExtUtils::MakeMaker: 0' >> META_new.yml + $(NOECHO) $(ECHO) 'requires: {}' >> META_new.yml + $(NOECHO) $(ECHO) 'no_index:' >> META_new.yml + $(NOECHO) $(ECHO) ' directory:' >> META_new.yml + $(NOECHO) $(ECHO) ' - t' >> META_new.yml + $(NOECHO) $(ECHO) ' - inc' >> META_new.yml + $(NOECHO) $(ECHO) 'generated_by: ExtUtils::MakeMaker version 6.56' >> META_new.yml + $(NOECHO) $(ECHO) 'meta-spec:' >> META_new.yml + $(NOECHO) $(ECHO) ' url: http://module-build.sourceforge.net/META-spec-v1.4.html' >> META_new.yml + $(NOECHO) $(ECHO) ' version: 1.4' >> META_new.yml + -$(NOECHO) $(MV) META_new.yml $(DISTVNAME)/META.yml + + +# --- MakeMaker signature section: +signature : + cpansign -s + + +# --- MakeMaker dist_basics section: +distclean :: realclean distcheck + $(NOECHO) $(NOOP) + +distcheck : + $(PERLRUN) "-MExtUtils::Manifest=fullcheck" -e fullcheck + +skipcheck : + $(PERLRUN) "-MExtUtils::Manifest=skipcheck" -e skipcheck + +manifest : + $(PERLRUN) "-MExtUtils::Manifest=mkmanifest" -e mkmanifest + +veryclean : realclean + $(RM_F) *~ */*~ *.orig */*.orig *.bak */*.bak *.old */*.old + + + +# --- MakeMaker dist_core section: + +dist : $(DIST_DEFAULT) $(FIRST_MAKEFILE) + $(NOECHO) $(ABSPERLRUN) -l -e 'print '\''Warning: Makefile possibly out of date with $(VERSION_FROM)'\''' \ + -e ' if -e '\''$(VERSION_FROM)'\'' and -M '\''$(VERSION_FROM)'\'' < -M '\''$(FIRST_MAKEFILE)'\'';' -- + +tardist : $(DISTVNAME).tar$(SUFFIX) + $(NOECHO) $(NOOP) + +uutardist : $(DISTVNAME).tar$(SUFFIX) + uuencode $(DISTVNAME).tar$(SUFFIX) $(DISTVNAME).tar$(SUFFIX) > $(DISTVNAME).tar$(SUFFIX)_uu + +$(DISTVNAME).tar$(SUFFIX) : distdir + $(PREOP) + $(TO_UNIX) + $(TAR) $(TARFLAGS) $(DISTVNAME).tar $(DISTVNAME) + $(RM_RF) $(DISTVNAME) + $(COMPRESS) $(DISTVNAME).tar + $(POSTOP) + +zipdist : $(DISTVNAME).zip + $(NOECHO) $(NOOP) + +$(DISTVNAME).zip : distdir + $(PREOP) + $(ZIP) $(ZIPFLAGS) $(DISTVNAME).zip $(DISTVNAME) + $(RM_RF) $(DISTVNAME) + $(POSTOP) + +shdist : distdir + $(PREOP) + $(SHAR) $(DISTVNAME) > $(DISTVNAME).shar + $(RM_RF) $(DISTVNAME) + $(POSTOP) + + +# --- MakeMaker distdir section: +create_distdir : + $(RM_RF) $(DISTVNAME) + $(PERLRUN) "-MExtUtils::Manifest=manicopy,maniread" \ + -e "manicopy(maniread(),'$(DISTVNAME)', '$(DIST_CP)');" + +distdir : create_distdir distmeta + $(NOECHO) $(NOOP) + + + +# --- MakeMaker dist_test section: +disttest : distdir + cd $(DISTVNAME) && $(ABSPERLRUN) Makefile.PL + cd $(DISTVNAME) && $(MAKE) $(PASTHRU) + cd $(DISTVNAME) && $(MAKE) test $(PASTHRU) + + + +# --- MakeMaker dist_ci section: + +ci : + $(PERLRUN) "-MExtUtils::Manifest=maniread" \ + -e "@all = keys %{ maniread() };" \ + -e "print(qq{Executing $(CI) @all\n}); system(qq{$(CI) @all});" \ + -e "print(qq{Executing $(RCS_LABEL) ...\n}); system(qq{$(RCS_LABEL) @all});" + + +# --- MakeMaker distmeta section: +distmeta : create_distdir metafile + $(NOECHO) cd $(DISTVNAME) && $(ABSPERLRUN) -MExtUtils::Manifest=maniadd -e 'eval { maniadd({q{META.yml} => q{Module meta-data (added by MakeMaker)}}) } ' \ + -e ' or print "Could not add META.yml to MANIFEST: $${'\''@'\''}\n"' -- + + + +# --- MakeMaker distsignature section: +distsignature : create_distdir + $(NOECHO) cd $(DISTVNAME) && $(ABSPERLRUN) -MExtUtils::Manifest=maniadd -e 'eval { maniadd({q{SIGNATURE} => q{Public-key signature (added by MakeMaker)}}) } ' \ + -e ' or print "Could not add SIGNATURE to MANIFEST: $${'\''@'\''}\n"' -- + $(NOECHO) cd $(DISTVNAME) && $(TOUCH) SIGNATURE + cd $(DISTVNAME) && cpansign -s + + + +# --- MakeMaker install section: + +install :: pure_install doc_install + $(NOECHO) $(NOOP) + +install_perl :: pure_perl_install doc_perl_install + $(NOECHO) $(NOOP) + +install_site :: pure_site_install doc_site_install + $(NOECHO) $(NOOP) + +install_vendor :: pure_vendor_install doc_vendor_install + $(NOECHO) $(NOOP) + +pure_install :: pure_$(INSTALLDIRS)_install + $(NOECHO) $(NOOP) + +doc_install :: doc_$(INSTALLDIRS)_install + $(NOECHO) $(NOOP) + +pure__install : pure_site_install + $(NOECHO) $(ECHO) INSTALLDIRS not defined, defaulting to INSTALLDIRS=site + +doc__install : doc_site_install + $(NOECHO) $(ECHO) INSTALLDIRS not defined, defaulting to INSTALLDIRS=site + +pure_perl_install :: all + $(NOECHO) umask 022; $(MOD_INSTALL) \ + $(INST_LIB) $(DESTINSTALLPRIVLIB) \ + $(INST_ARCHLIB) $(DESTINSTALLARCHLIB) \ + $(INST_BIN) $(DESTINSTALLBIN) \ + $(INST_SCRIPT) $(DESTINSTALLSCRIPT) \ + $(INST_MAN1DIR) $(DESTINSTALLMAN1DIR) \ + $(INST_MAN3DIR) $(DESTINSTALLMAN3DIR) + $(NOECHO) $(WARN_IF_OLD_PACKLIST) \ + $(SITEARCHEXP)/auto/$(FULLEXT) + + +pure_site_install :: all + $(NOECHO) umask 02; $(MOD_INSTALL) \ + read $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist \ + write $(DESTINSTALLSITEARCH)/auto/$(FULLEXT)/.packlist \ + $(INST_LIB) $(DESTINSTALLSITELIB) \ + $(INST_ARCHLIB) $(DESTINSTALLSITEARCH) \ + $(INST_BIN) $(DESTINSTALLSITEBIN) \ + $(INST_SCRIPT) $(DESTINSTALLSITESCRIPT) \ + $(INST_MAN1DIR) $(DESTINSTALLSITEMAN1DIR) \ + $(INST_MAN3DIR) $(DESTINSTALLSITEMAN3DIR) + $(NOECHO) $(WARN_IF_OLD_PACKLIST) \ + $(PERL_ARCHLIB)/auto/$(FULLEXT) + +pure_vendor_install :: all + $(NOECHO) umask 022; $(MOD_INSTALL) \ + $(INST_LIB) $(DESTINSTALLVENDORLIB) \ + $(INST_ARCHLIB) $(DESTINSTALLVENDORARCH) \ + $(INST_BIN) $(DESTINSTALLVENDORBIN) \ + $(INST_SCRIPT) $(DESTINSTALLVENDORSCRIPT) \ + $(INST_MAN1DIR) $(DESTINSTALLVENDORMAN1DIR) \ + $(INST_MAN3DIR) $(DESTINSTALLVENDORMAN3DIR) + +doc_perl_install :: all + +doc_site_install :: all + $(NOECHO) $(ECHO) Appending installation info to $(DESTINSTALLSITEARCH)/perllocal.pod + -$(NOECHO) umask 02; $(MKPATH) $(DESTINSTALLSITEARCH) + -$(NOECHO) umask 02; $(DOC_INSTALL) \ + "Module" "$(NAME)" \ + "installed into" "$(INSTALLSITELIB)" \ + LINKTYPE "$(LINKTYPE)" \ + VERSION "$(VERSION)" \ + EXE_FILES "$(EXE_FILES)" \ + >> $(DESTINSTALLSITEARCH)/perllocal.pod + +doc_vendor_install :: all + + +uninstall :: uninstall_from_$(INSTALLDIRS)dirs + $(NOECHO) $(NOOP) + +uninstall_from_perldirs :: + +uninstall_from_sitedirs :: + $(NOECHO) $(UNINSTALL) $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist + +uninstall_from_vendordirs :: + + + +# --- MakeMaker force section: +# Phony target to force checking subdirectories. +FORCE : + $(NOECHO) $(NOOP) + + +# --- MakeMaker perldepend section: + +PERL_HDRS = \ + $(PERL_INC)/EXTERN.h \ + $(PERL_INC)/INTERN.h \ + $(PERL_INC)/XSUB.h \ + $(PERL_INC)/av.h \ + $(PERL_INC)/cc_runtime.h \ + $(PERL_INC)/config.h \ + $(PERL_INC)/cop.h \ + $(PERL_INC)/cv.h \ + $(PERL_INC)/dosish.h \ + $(PERL_INC)/embed.h \ + $(PERL_INC)/embedvar.h \ + $(PERL_INC)/fakethr.h \ + $(PERL_INC)/form.h \ + $(PERL_INC)/gv.h \ + $(PERL_INC)/handy.h \ + $(PERL_INC)/hv.h \ + $(PERL_INC)/intrpvar.h \ + $(PERL_INC)/iperlsys.h \ + $(PERL_INC)/keywords.h \ + $(PERL_INC)/mg.h \ + $(PERL_INC)/nostdio.h \ + $(PERL_INC)/op.h \ + $(PERL_INC)/opcode.h \ + $(PERL_INC)/patchlevel.h \ + $(PERL_INC)/perl.h \ + $(PERL_INC)/perlio.h \ + $(PERL_INC)/perlsdio.h \ + $(PERL_INC)/perlsfio.h \ + $(PERL_INC)/perlvars.h \ + $(PERL_INC)/perly.h \ + $(PERL_INC)/pp.h \ + $(PERL_INC)/pp_proto.h \ + $(PERL_INC)/proto.h \ + $(PERL_INC)/regcomp.h \ + $(PERL_INC)/regexp.h \ + $(PERL_INC)/regnodes.h \ + $(PERL_INC)/scope.h \ + $(PERL_INC)/sv.h \ + $(PERL_INC)/thread.h \ + $(PERL_INC)/unixish.h \ + $(PERL_INC)/util.h + +$(OBJECT) : $(PERL_HDRS) + + +# --- MakeMaker makefile section: + +$(OBJECT) : $(FIRST_MAKEFILE) + +# We take a very conservative approach here, but it's worth it. +# We move Makefile to Makefile.old here to avoid gnu make looping. +$(FIRST_MAKEFILE) : Makefile.PL $(CONFIGDEP) + $(NOECHO) $(ECHO) "Makefile out-of-date with respect to $?" + $(NOECHO) $(ECHO) "Cleaning current config before rebuilding Makefile..." + -$(NOECHO) $(RM_F) $(MAKEFILE_OLD) + -$(NOECHO) $(MV) $(FIRST_MAKEFILE) $(MAKEFILE_OLD) + - $(MAKE) $(USEMAKEFILE) $(MAKEFILE_OLD) clean $(DEV_NULL) + $(PERLRUN) Makefile.PL + $(NOECHO) $(ECHO) "==> Your Makefile has been rebuilt. <==" + $(NOECHO) $(ECHO) "==> Please rerun the $(MAKE) command. <==" + $(FALSE) + + + +# --- MakeMaker staticmake section: + +# --- MakeMaker makeaperl section --- +MAP_TARGET = perl +FULLPERL = /usr/bin/perl + +$(MAP_TARGET) :: static $(MAKE_APERL_FILE) + $(MAKE) $(USEMAKEFILE) $(MAKE_APERL_FILE) $@ + +$(MAKE_APERL_FILE) : $(FIRST_MAKEFILE) pm_to_blib + $(NOECHO) $(ECHO) Writing \"$(MAKE_APERL_FILE)\" for this $(MAP_TARGET) + $(NOECHO) $(PERLRUNINST) \ + Makefile.PL DIR= \ + MAKEFILE=$(MAKE_APERL_FILE) LINKTYPE=static \ + MAKEAPERL=1 NORECURS=1 CCCDLFLAGS= + + +# --- MakeMaker test section: + +TEST_VERBOSE=0 +TEST_TYPE=test_$(LINKTYPE) +TEST_FILE = test.pl +TEST_FILES = +TESTDB_SW = -d + +testdb :: testdb_$(LINKTYPE) + +test :: $(TEST_TYPE) subdirs-test + +subdirs-test :: + $(NOECHO) $(NOOP) + + +test_dynamic :: pure_all + PERL_DL_NONLAZY=1 $(FULLPERLRUN) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE) + +testdb_dynamic :: pure_all + PERL_DL_NONLAZY=1 $(FULLPERLRUN) $(TESTDB_SW) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE) + +test_ : test_dynamic + +test_static :: pure_all $(MAP_TARGET) + PERL_DL_NONLAZY=1 ./$(MAP_TARGET) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE) + +testdb_static :: pure_all $(MAP_TARGET) + PERL_DL_NONLAZY=1 ./$(MAP_TARGET) $(TESTDB_SW) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE) + + + +# --- MakeMaker ppd section: +# Creates a PPD (Perl Package Description) for a binary distribution. +ppd : + $(NOECHO) $(ECHO) '' > $(DISTNAME).ppd + $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd + $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd + $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd + $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd + $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd + $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd + $(NOECHO) $(ECHO) '' >> $(DISTNAME).ppd + + +# --- MakeMaker pm_to_blib section: + +pm_to_blib : $(FIRST_MAKEFILE) $(TO_INST_PM) + $(NOECHO) $(ABSPERLRUN) -MExtUtils::Install -e 'pm_to_blib({@ARGV}, '\''$(INST_LIB)/auto'\'', q[$(PM_FILTER)], '\''$(PERM_DIR)'\'')' -- \ + CRFPP.pm $(INST_LIB)/CRFPP.pm + $(NOECHO) $(TOUCH) pm_to_blib + + +# --- MakeMaker selfdocument section: + + +# --- MakeMaker postamble section: + + +# End. diff --git a/CRF/ruby/Makefile b/CRF/ruby/Makefile index f640229a1230c6214f2d7022d1adadf68a9a5d7c..4825f4468813527d53efd7191d2c818e2820ab72 100644 --- a/CRF/ruby/Makefile +++ b/CRF/ruby/Makefile @@ -1,157 +1,157 @@ - -SHELL = /bin/sh - -#### Start of system configuration section. #### - -srcdir = . -topdir = /usr/lib/ruby/1.8/x86_64-linux -hdrdir = $(topdir) -VPATH = $(srcdir):$(topdir):$(hdrdir) -exec_prefix = $(prefix) -prefix = $(DESTDIR)/usr -sharedstatedir = $(prefix)/com -mandir = $(prefix)/share/man -psdir = $(docdir) -oldincludedir = $(DESTDIR)/usr/include -localedir = $(datarootdir)/locale -bindir = $(exec_prefix)/bin -libexecdir = $(prefix)/lib/ruby1.8 -sitedir = $(DESTDIR)/usr/local/lib/site_ruby -htmldir = $(docdir) -vendorarchdir = $(vendorlibdir)/$(sitearch) -includedir = $(prefix)/include -infodir = $(prefix)/share/info -vendorlibdir = $(vendordir)/$(ruby_version) -sysconfdir = $(DESTDIR)/etc -libdir = $(exec_prefix)/lib -sbindir = $(exec_prefix)/sbin -rubylibdir = $(libdir)/ruby/$(ruby_version) -docdir = $(datarootdir)/doc/$(PACKAGE) -dvidir = $(docdir) -vendordir = $(libdir)/ruby/vendor_ruby -datarootdir = $(prefix)/share -pdfdir = $(docdir) -archdir = $(rubylibdir)/$(arch) -sitearchdir = $(sitelibdir)/$(sitearch) -datadir = $(datarootdir) -localstatedir = $(DESTDIR)/var -sitelibdir = $(sitedir)/$(ruby_version) - -CC = gcc -LIBRUBY = $(LIBRUBY_SO) -LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a -LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME) -LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static - -RUBY_EXTCONF_H = -CFLAGS = -fPIC -fno-strict-aliasing -g -g -O2 -fPIC $(cflags) -INCFLAGS = -I. -I. -I/usr/lib/ruby/1.8/x86_64-linux -I. -DEFS = -CPPFLAGS = -DHAVE_CRFPP_H -CXXFLAGS = $(CFLAGS) -ldflags = -L. -Wl,-Bsymbolic-functions -rdynamic -Wl,-export-dynamic -dldflags = -archflag = -DLDFLAGS = $(ldflags) $(dldflags) $(archflag) -LDSHARED = $(CC) -shared -AR = ar -EXEEXT = - -RUBY_INSTALL_NAME = ruby1.8 -RUBY_SO_NAME = ruby1.8 -arch = x86_64-linux -sitearch = x86_64-linux -ruby_version = 1.8 -ruby = /usr/bin/ruby1.8 -RUBY = $(ruby) -RM = rm -f -MAKEDIRS = mkdir -p -INSTALL = /usr/bin/install -c -INSTALL_PROG = $(INSTALL) -m 0755 -INSTALL_DATA = $(INSTALL) -m 644 -COPY = cp - -#### End of system configuration section. #### - -preload = - -libpath = . $(libdir) -LIBPATH = -L. -L$(libdir) -DEFFILE = - -CLEANFILES = mkmf.log -DISTCLEANFILES = - -extout = -extout_prefix = -target_prefix = -LOCAL_LIBS = -LIBS = $(LIBRUBYARG_SHARED) -lpthread -lcrfpp -lpthread -lrt -ldl -lcrypt -lm -lc -SRCS = CRFPP_wrap.cpp -OBJS = CRFPP_wrap.o -TARGET = CRFPP -DLLIB = $(TARGET).so -EXTSTATIC = -STATIC_LIB = - -BINDIR = $(bindir) -RUBYCOMMONDIR = $(sitedir)$(target_prefix) -RUBYLIBDIR = $(sitelibdir)$(target_prefix) -RUBYARCHDIR = $(sitearchdir)$(target_prefix) - -TARGET_SO = $(DLLIB) -CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map -CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak - -all: $(DLLIB) -static: $(STATIC_LIB) - -clean: - @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) - -distclean: clean - @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log - @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES) - -realclean: distclean -install: install-so install-rb - -install-so: $(RUBYARCHDIR) -install-so: $(RUBYARCHDIR)/$(DLLIB) -$(RUBYARCHDIR)/$(DLLIB): $(DLLIB) - $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR) -install-rb: pre-install-rb install-rb-default -install-rb-default: pre-install-rb-default -pre-install-rb: Makefile -pre-install-rb-default: Makefile -$(RUBYARCHDIR): - $(MAKEDIRS) $@ - -site-install: site-install-so site-install-rb -site-install-so: install-so -site-install-rb: install-rb - -.SUFFIXES: .c .m .cc .cxx .cpp .C .o - -.cc.o: - $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< - -.cxx.o: - $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< - -.cpp.o: - $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< - -.C.o: - $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< - -.c.o: - $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $< - -$(DLLIB): $(OBJS) Makefile - @-$(RM) $@ - $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS) - - - -$(OBJS): ruby.h defines.h + +SHELL = /bin/sh + +#### Start of system configuration section. #### + +srcdir = . +topdir = /usr/lib/ruby/1.8/x86_64-linux +hdrdir = $(topdir) +VPATH = $(srcdir):$(topdir):$(hdrdir) +exec_prefix = $(prefix) +prefix = $(DESTDIR)/usr +sharedstatedir = $(prefix)/com +mandir = $(prefix)/share/man +psdir = $(docdir) +oldincludedir = $(DESTDIR)/usr/include +localedir = $(datarootdir)/locale +bindir = $(exec_prefix)/bin +libexecdir = $(prefix)/lib/ruby1.8 +sitedir = $(DESTDIR)/usr/local/lib/site_ruby +htmldir = $(docdir) +vendorarchdir = $(vendorlibdir)/$(sitearch) +includedir = $(prefix)/include +infodir = $(prefix)/share/info +vendorlibdir = $(vendordir)/$(ruby_version) +sysconfdir = $(DESTDIR)/etc +libdir = $(exec_prefix)/lib +sbindir = $(exec_prefix)/sbin +rubylibdir = $(libdir)/ruby/$(ruby_version) +docdir = $(datarootdir)/doc/$(PACKAGE) +dvidir = $(docdir) +vendordir = $(libdir)/ruby/vendor_ruby +datarootdir = $(prefix)/share +pdfdir = $(docdir) +archdir = $(rubylibdir)/$(arch) +sitearchdir = $(sitelibdir)/$(sitearch) +datadir = $(datarootdir) +localstatedir = $(DESTDIR)/var +sitelibdir = $(sitedir)/$(ruby_version) + +CC = gcc +LIBRUBY = $(LIBRUBY_SO) +LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a +LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME) +LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static + +RUBY_EXTCONF_H = +CFLAGS = -fPIC -fno-strict-aliasing -g -g -O2 -fPIC $(cflags) +INCFLAGS = -I. -I. -I/usr/lib/ruby/1.8/x86_64-linux -I. +DEFS = +CPPFLAGS = -DHAVE_CRFPP_H +CXXFLAGS = $(CFLAGS) +ldflags = -L. -Wl,-Bsymbolic-functions -rdynamic -Wl,-export-dynamic +dldflags = +archflag = +DLDFLAGS = $(ldflags) $(dldflags) $(archflag) +LDSHARED = $(CC) -shared +AR = ar +EXEEXT = + +RUBY_INSTALL_NAME = ruby1.8 +RUBY_SO_NAME = ruby1.8 +arch = x86_64-linux +sitearch = x86_64-linux +ruby_version = 1.8 +ruby = /usr/bin/ruby1.8 +RUBY = $(ruby) +RM = rm -f +MAKEDIRS = mkdir -p +INSTALL = /usr/bin/install -c +INSTALL_PROG = $(INSTALL) -m 0755 +INSTALL_DATA = $(INSTALL) -m 644 +COPY = cp + +#### End of system configuration section. #### + +preload = + +libpath = . $(libdir) +LIBPATH = -L. -L$(libdir) +DEFFILE = + +CLEANFILES = mkmf.log +DISTCLEANFILES = + +extout = +extout_prefix = +target_prefix = +LOCAL_LIBS = +LIBS = $(LIBRUBYARG_SHARED) -lpthread -lcrfpp -lpthread -lrt -ldl -lcrypt -lm -lc +SRCS = CRFPP_wrap.cpp +OBJS = CRFPP_wrap.o +TARGET = CRFPP +DLLIB = $(TARGET).so +EXTSTATIC = +STATIC_LIB = + +BINDIR = $(bindir) +RUBYCOMMONDIR = $(sitedir)$(target_prefix) +RUBYLIBDIR = $(sitelibdir)$(target_prefix) +RUBYARCHDIR = $(sitearchdir)$(target_prefix) + +TARGET_SO = $(DLLIB) +CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map +CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak + +all: $(DLLIB) +static: $(STATIC_LIB) + +clean: + @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) + +distclean: clean + @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log + @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES) + +realclean: distclean +install: install-so install-rb + +install-so: $(RUBYARCHDIR) +install-so: $(RUBYARCHDIR)/$(DLLIB) +$(RUBYARCHDIR)/$(DLLIB): $(DLLIB) + $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR) +install-rb: pre-install-rb install-rb-default +install-rb-default: pre-install-rb-default +pre-install-rb: Makefile +pre-install-rb-default: Makefile +$(RUBYARCHDIR): + $(MAKEDIRS) $@ + +site-install: site-install-so site-install-rb +site-install-so: install-so +site-install-rb: install-rb + +.SUFFIXES: .c .m .cc .cxx .cpp .C .o + +.cc.o: + $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< + +.cxx.o: + $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< + +.cpp.o: + $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< + +.C.o: + $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< + +.c.o: + $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $< + +$(DLLIB): $(OBJS) Makefile + @-$(RM) $@ + $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS) + + + +$(OBJS): ruby.h defines.h diff --git a/CRF/winmain.h b/CRF/winmain.h index 464494b8855dfc7a0dbd7ae45220b4b1071a5abc..74d3a02cbca39dd7b898be54cfe34e778e8a8e94 100644 --- a/CRF/winmain.h +++ b/CRF/winmain.h @@ -1,69 +1,69 @@ -// -// CRF++ -- Yet Another CRF toolkit -// -// $Id: common.h 1588 2007-02-12 09:03:39Z taku $; -// -// Copyright(C) 2005-2007 Taku Kudo -// -#if defined(_WIN32) || defined(__CYGWIN__) - -#include -#include - -namespace { -class CommandLine { - public: - CommandLine(int argc, wchar_t **argv) : argc_(argc), argv_(0) { - argv_ = new char * [argc_]; - for (int i = 0; i < argc_; ++i) { - const std::string arg = WideToUtf8(argv[i]); - argv_[i] = new char[arg.size() + 1]; - ::memcpy(argv_[i], arg.data(), arg.size()); - argv_[i][arg.size()] = '\0'; - } - } - ~CommandLine() { - for (int i = 0; i < argc_; ++i) { - delete [] argv_[i]; - } - delete [] argv_; - } - - int argc() const { return argc_; } - char **argv() const { return argv_; } - - private: - static std::string WideToUtf8(const std::wstring &input) { - const int output_length = ::WideCharToMultiByte(CP_UTF8, 0, - input.c_str(), -1, NULL, 0, - NULL, NULL); - if (output_length == 0) { - return ""; - } - - char *input_encoded = new char[output_length + 1]; - const int result = ::WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, - input_encoded, - output_length + 1, NULL, NULL); - std::string output; - if (result > 0) { - output.assign(input_encoded); - } - delete [] input_encoded; - return output; - } - - int argc_; - char **argv_; -}; -} // namespace - -#define main(argc, argv) wmain_to_main_wrapper(argc, argv) - -int wmain_to_main_wrapper(int argc, char **argv); - -int wmain(int argc, wchar_t **argv) { - CommandLine cmd(argc, argv); - return wmain_to_main_wrapper(cmd.argc(), cmd.argv()); -} -#endif +// +// CRF++ -- Yet Another CRF toolkit +// +// $Id: common.h 1588 2007-02-12 09:03:39Z taku $; +// +// Copyright(C) 2005-2007 Taku Kudo +// +#if defined(_WIN32) || defined(__CYGWIN__) + +#include +#include + +namespace { +class CommandLine { + public: + CommandLine(int argc, wchar_t **argv) : argc_(argc), argv_(0) { + argv_ = new char * [argc_]; + for (int i = 0; i < argc_; ++i) { + const std::string arg = WideToUtf8(argv[i]); + argv_[i] = new char[arg.size() + 1]; + ::memcpy(argv_[i], arg.data(), arg.size()); + argv_[i][arg.size()] = '\0'; + } + } + ~CommandLine() { + for (int i = 0; i < argc_; ++i) { + delete [] argv_[i]; + } + delete [] argv_; + } + + int argc() const { return argc_; } + char **argv() const { return argv_; } + + private: + static std::string WideToUtf8(const std::wstring &input) { + const int output_length = ::WideCharToMultiByte(CP_UTF8, 0, + input.c_str(), -1, NULL, 0, + NULL, NULL); + if (output_length == 0) { + return ""; + } + + char *input_encoded = new char[output_length + 1]; + const int result = ::WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, + input_encoded, + output_length + 1, NULL, NULL); + std::string output; + if (result > 0) { + output.assign(input_encoded); + } + delete [] input_encoded; + return output; + } + + int argc_; + char **argv_; +}; +} // namespace + +#define main(argc, argv) wmain_to_main_wrapper(argc, argv) + +int wmain_to_main_wrapper(int argc, char **argv); + +int wmain(int argc, wchar_t **argv) { + CommandLine cmd(argc, argv); + return wmain_to_main_wrapper(cmd.argc(), cmd.argv()); +} +#endif diff --git a/GeneNER_SpeAss_run.py b/GeneNER_SpeAss_run.py index b77484e9359aaa9447bba0fa0c9b8dba03ea073f..82aece37971c4f6f56eea1043d4f7f6e03b6a1b5 100755 --- a/GeneNER_SpeAss_run.py +++ b/GeneNER_SpeAss_run.py @@ -1,746 +1,746 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Jun 8 09:26:57 2022 - -@author: luol2 - -Pipeline: first gene NER, then species assignment -input: species NER bioc xml file -output: gene ner and species assignment results bioc xml file -""" -import argparse -import os -import io -import time -import sys -import re -import shutil -from src_python.GeneNER import model_ner,ner_tag -from src_python.SpeAss import model_sa,sa_tag - -import tensorflow as tf - -import bioc -import stanza -nlp_token = stanza.Pipeline(model_dir='gnorm_trained_models/stanza', lang='en', processors={'tokenize': 'spacy'},package='None', download_method=None) #package='craft' ;./gnorm_trained_models/stanza - -def NER_BioC(infolder,infile,outpath,nn_model): - - with open(infolder+"/"+infile, 'r',encoding='utf-8') as fin: - with open(outpath+"/"+infile,'w', encoding='utf8') as fout: - collection = bioc.load(fin) - - Total_n=len(collection.documents) - print('Total number of sub-documents:', Total_n) - pmid_n=0 - for document in collection.documents: - print("Processing:{0}%".format(round(pmid_n * 100 / Total_n)), end="\r") - pmid_n+=1 - # print(document.id) - mention_num_new=0 - for passage in document.passages: - if passage.text!='' and (not passage.text.isspace()) and passage.infons['type']!='ref': # have text and is not ref - passage_offset=passage.offset - tag_result=ner_tag.ML_Tag(passage.text,nn_model,nlp_token) - mention_num=0 - for ele in tag_result: - bioc_note = bioc.BioCAnnotation() - bioc_note.id = str(mention_num) - mention_num+=1 - bioc_note.infons['type'] = ele[2] - start = int(ele[0]) - last = int(ele[1]) - loc = bioc.BioCLocation(offset=str(passage_offset+start), length= str(last-start)) - bioc_note.locations.append(loc) - bioc_note.text = passage.text[start:last] - passage.annotations.append(bioc_note) - #update id - for temp_annotation in passage.annotations: - temp_annotation.id=str(mention_num_new) - mention_num_new+=1 - bioc.dump(collection, fout, pretty_print=True) - -def NER_PubTator(infolder,infile,outpath,nn_model): - with open(infolder+"/"+infile, 'r',encoding='utf-8') as fin: - with open(outpath+"/"+infile,'w', encoding='utf-8') as fout: - title='' - abstract='' - all_text=fin.read().strip().split('\n\n') - Total_n=len(all_text) - print('Total number of sub-documents:', Total_n) - pmid_n=0 - for doc in all_text: - print("Processing:{0}%".format(round(pmid_n * 100 / Total_n)), end="\r") - pmid_n+=1 - lines = doc.split('\n') - seg=lines[0].split('|t|') - pmid=seg[0] - title="" - if len(seg)>1: - title=seg[1] - abstract="" - if len(lines)>1: - seg=lines[1].split('|a|') - abstract=seg[1] - if len(seg)>1: - abstract=seg[1] - - intext=title+' '+abstract - tag_result=ner_tag.ML_Tag(intext,nn_model,nlp_token) - fout.write(doc+'\n') - for ele in tag_result: - ent_start = ele[0] - ent_last = ele[1] - ent_mention = intext[int(ele[0]):int(ele[1])] - ent_type=ele[2] - fout.write(pmid+"\t"+ent_start+"\t"+ent_last+"\t"+ent_mention+"\t"+ent_type+"\n") - fout.write('\n') - title='' - abstract='' - -def geneNER(infolder, outpath, modelfile): - - print('loading NER models........') - - if modelfile.lower().find('bioformer')>=0: - vocabfiles={'labelfile':'./vocab/GeneNER_label.vocab', - 'checkpoint_path':'./gnorm_trained_models/bioformer-cased-v1.0/', #bioformer-cased-v1.0 - 'lowercase':False, - } - else: - vocabfiles={'labelfile':'./vocab/GeneNER_label.vocab', - 'checkpoint_path':'./gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/', - 'lowercase':True, - } - - nn_model=model_ner.HUGFACE_NER(vocabfiles) - nn_model.build_encoder() - nn_model.build_softmax_decoder() - nn_model.load_model(modelfile) - - #tagging text - print("begin GeneNER tagging........") - start_time=time.time() - - for infile in os.listdir(infolder): - if os.path.isfile(outpath+"/"+infile): - print(infile+' has exsited.') - else: - print('processing:',infile) - fin = open(infolder+"/"+infile, 'r',encoding='utf-8') - input_format="" - for line in fin: - pattern_bioc = re.compile('.*.*') - pattern_pubtator = re.compile('^([^\|]+)\|[^\|]+\|(.*)') - if pattern_bioc.search(line): - input_format="BioC" - break - elif pattern_pubtator.search(line): - input_format="PubTator" - break - fin.close() - if(input_format == "PubTator"): - NER_PubTator(infolder,infile,outpath,nn_model) - elif(input_format == "BioC"): - NER_BioC(infolder,infile,outpath,nn_model) - - print('tag done:',time.time()-start_time) - - -#SA for bioc format -def SA_BioC(infolder,infile,outpath,nn_model,virus_set,prefix_dict): - - #BioC xml to pubtator - # pmid|t|text1 - #pmid|a|text2 - #pmid sid eid entity_txt entity_type entity_id (gene is blank) - fin = open(infolder+"/"+infile, 'r',encoding='utf-8') - # fout_pubtator=open(outpath+'tmp/input_xml.pubtator','w', encoding='utf-8') - fin_pubtator0=io.StringIO() #none *species - fin_pubtator1=io.StringIO() #one *species - fin_pubtator2=io.StringIO() #two or more species - collection = bioc.load(fin) - fin.close() - ori_ann_index={} #{'pmid':{'ent.id':'ent_s-ent_e'}} - species_count={} #{pmid:{speid:num}} - gene_set=['Gene','FamilyName'] - final_sa_results={} #{'pmid':{'entity_id':species_id}} - for document in collection.documents: - doc_pmid=document.id - doc_title='' - doc_abstract='' - doc_annotation=[] - _ann_index={} - _species_num={} #{*speciesid:num} - _gene_num=0 - _passage_num=0 - if len(document.passages)<=2: #abstract xml or PMC only have title - for passage in document.passages: - passage_offset=passage.offset - _passage_num+=1 - #print(passage_offset,type(passage_offset)) - #if passage.infons['type']=='title' or passage.infons['type']=='front': - if _passage_num==1: - doc_title=passage.text - for temp_annotation in passage.annotations: - if temp_annotation.infons['type'] in gene_set: - _gene_num+=1 - ent_start=temp_annotation.locations[0].offset-passage_offset - ent_end=ent_start+temp_annotation.locations[0].length - #print(ent_start,ent_end) - _ann_index[temp_annotation.id]=str(ent_start)+'-'+str(ent_end) - # print(temp_annotation.infons) - if 'Identifier' in temp_annotation.infons.keys(): - # print(temp_annotation.infons.keys['Identifier']) - species_ID=temp_annotation.infons['Identifier'] - if species_ID.find('*')>=0: - if species_ID not in _species_num.keys(): - _species_num[species_ID]=1 - else: - _species_num[species_ID]+=1 - doc_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']+'\t'+species_ID) - else: - doc_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']) - - #elif passage.infons['type']=='abstract' or passage.infons['type']=='paragraph': - else: - doc_abstract=passage.text - for temp_annotation in passage.annotations: - if temp_annotation.infons['type'] in gene_set: - _gene_num+=1 - ent_start=len(doc_title)+1+temp_annotation.locations[0].offset-passage_offset - ent_end=ent_start+temp_annotation.locations[0].length - #print(ent_start,ent_end) - _ann_index[temp_annotation.id]=str(ent_start)+'-'+str(ent_end) - if 'Identifier' in temp_annotation.infons.keys(): - # print(temp_annotation.infons.keys['Identifier']) - species_ID=temp_annotation.infons['Identifier'] - if species_ID.find('*')>=0: - if species_ID not in _species_num.keys(): - _species_num[species_ID]=1 - else: - _species_num[species_ID]+=1 - doc_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']+'\t'+species_ID) - else: - doc_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']) - - if len(_species_num)>=2 and _gene_num>0: - fin_pubtator2.write(doc_pmid+'|t|'+doc_title+'\n') - fin_pubtator2.write(doc_pmid+'|a|'+doc_abstract+'\n') - for ele in doc_annotation: - fin_pubtator2.write(ele+'\n') - fin_pubtator2.write('\n') - elif len(_species_num)==1 and _gene_num>0: #可以直接给结果 - fin_pubtator1.write(doc_pmid+'|t|'+doc_title+'\n') - fin_pubtator1.write(doc_pmid+'|a|'+doc_abstract+'\n') - major_speicesid,=_species_num - fin_pubtator1.write(major_speicesid[1:]+'\n') - for ele in doc_annotation: - fin_pubtator1.write(ele+'\n') - fin_pubtator1.write('\n') - elif len(_species_num)==0 and _gene_num>0: - fin_pubtator0.write(doc_pmid+'|t|'+doc_title+'\n') - fin_pubtator0.write(doc_pmid+'|a|'+doc_abstract+'\n') - for ele in doc_annotation: - fin_pubtator0.write(ele+'\n') - fin_pubtator0.write('\n') - - else: # full text xml - for passage in document.passages: - passage_annotation=[] - _species_num_passage={} - _gene_num_passage=0 - passage_offset=passage.offset - #print(passage_offset,type(passage_offset)) - if passage.text!='' and (not passage.text.isspace()) and passage.infons['type']!='ref': - doc_title=passage.text - for temp_annotation in passage.annotations: - if temp_annotation.infons['type'] in gene_set: - _gene_num_passage+=1 - ent_start=temp_annotation.locations[0].offset-passage_offset - ent_end=ent_start+temp_annotation.locations[0].length - #print(ent_start,ent_end) - _ann_index[temp_annotation.id]=str(ent_start)+'-'+str(ent_end) - # print(temp_annotation.infons) - if 'Identifier' in temp_annotation.infons.keys(): - # print(temp_annotation.infons.keys['Identifier']) - species_ID=temp_annotation.infons['Identifier'] - if species_ID.find('*')>=0: - if species_ID not in _species_num.keys(): - _species_num[species_ID]=1 - else: - _species_num[species_ID]+=1 - if species_ID not in _species_num_passage.keys(): - _species_num_passage[species_ID]=1 - else: - _species_num_passage[species_ID]+=1 - passage_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']+'\t'+species_ID) - else: - passage_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']) - - - if len(_species_num_passage)>=2 and _gene_num_passage>0: - fin_pubtator2.write(doc_pmid+'|t|'+doc_title+'\n') - fin_pubtator2.write(doc_pmid+'|a|'+doc_abstract+'\n') - for ele in passage_annotation: - fin_pubtator2.write(ele+'\n') - fin_pubtator2.write('\n') - elif len(_species_num_passage)==1 and _gene_num_passage>0: #可以直接给结果 - fin_pubtator1.write(doc_pmid+'|t|'+doc_title+'\n') - fin_pubtator1.write(doc_pmid+'|a|'+doc_abstract+'\n') - major_speicesid,=_species_num_passage - fin_pubtator1.write(major_speicesid[1:]+'\n') - for ele in passage_annotation: - fin_pubtator1.write(ele+'\n') - fin_pubtator1.write('\n') - elif len(_species_num_passage)==0 and _gene_num_passage>0: - fin_pubtator0.write(doc_pmid+'|t|'+doc_title+'\n') - fin_pubtator0.write(doc_pmid+'|a|'+doc_abstract+'\n') - for ele in passage_annotation: - fin_pubtator0.write(ele+'\n') - fin_pubtator0.write('\n') - # print(ori_ann_index) - - ori_ann_index[doc_pmid]=_ann_index - species_count[doc_pmid]=_species_num - - - cache_geneid={} #{pmid:{gene1:{id1:num,id2:num}}} - - if fin_pubtator2.getvalue()!='': - #pubtator format ML tagging - # print(fin_pubtator2.getvalue()) - ml_out= sa_tag.ml_tag_main(fin_pubtator2,nlp_token, nn_model) - #print(ml_out.getvalue()) - fin_result=io.StringIO(ml_out.getvalue()) - all_in=fin_result.read().strip().split('\n\n') - #print('+2 species:',len(all_in)) - fin_result.close() - - prefix_speid_allset=set(prefix_dict.keys()) - - for doc in all_in: - lines=doc.split('\n') - pmid=lines[0].split('|t|')[0] - _prefix_str2id_dict={} - doc_species=list(species_count[pmid].keys()) - for _spe_ele in doc_species: - if _spe_ele[1:] in prefix_speid_allset: - for ele in prefix_dict[_spe_ele[1:]]: - _prefix_str2id_dict[ele]=_spe_ele[1:] - - for i in range(2,len(lines)): - segs=lines[i].split('\t') - if pmid not in final_sa_results.keys(): - final_sa_results[pmid]={segs[1]:'Focus:'+segs[-1]} - else: - final_sa_results[pmid][segs[1]]='Focus:'+segs[-1] - - if segs[5] in gene_set: - if segs[4][0:2] in _prefix_str2id_dict: #prefix rule - #print('prefix rule:', pmid) - # print(_prefix_str2id_dict) - if pmid not in final_sa_results.keys(): - final_sa_results[pmid]={segs[1]:'Focus:'+_prefix_str2id_dict[segs[4][0:2]]} - else: - final_sa_results[pmid][segs[1]]='Focus:'+_prefix_str2id_dict[segs[4][0:2]] - if pmid not in cache_geneid.keys(): - cache_geneid[pmid]={segs[4]:{'Focus:'+segs[-1]:1}} - else: - if segs[4] not in cache_geneid[pmid].keys(): - cache_geneid[pmid][segs[4]]={'Focus:'+segs[-1]:1} - else: - if segs[-1] not in cache_geneid[pmid][segs[4]].keys(): - cache_geneid[pmid][segs[4]]['Focus:'+segs[-1]]=1 - else: - cache_geneid[pmid][segs[4]]['Focus:'+segs[-1]]+=1 - - #print(final_sa_results) - - #one species - if fin_pubtator1.getvalue()!='': - fin_result=io.StringIO(fin_pubtator1.getvalue()) - all_in=fin_result.read().strip().split('\n\n') - fin_result.close() - #print('1 species:',len(all_in)) - for doc in all_in: - lines=doc.split('\n') - pmid=lines[0].split('|t|')[0] - major_speicesid=lines[2] - for i in range(3,len(lines)): - segs=lines[i].split('\t') - if len(segs)>=7:#species - if pmid not in final_sa_results.keys(): - final_sa_results[pmid]={segs[1]:segs[-1]} - else: - final_sa_results[pmid][segs[1]]=segs[-1] - else:#gene - marjor_species='Focus:'+major_speicesid - if pmid not in final_sa_results.keys(): - final_sa_results[pmid]={segs[1]:marjor_species} - else: - final_sa_results[pmid][segs[1]]=marjor_species - if pmid not in cache_geneid.keys(): - cache_geneid[pmid]={segs[4]:{marjor_species:1}} - else: - if segs[4] not in cache_geneid[pmid].keys(): - cache_geneid[pmid][segs[4]]={marjor_species:1} - else: - if segs[-1] not in cache_geneid[pmid][segs[4]].keys(): - cache_geneid[pmid][segs[4]][marjor_species]=1 - else: - cache_geneid[pmid][segs[4]][marjor_species]+=1 - - - #no species - fin_result=io.StringIO(fin_pubtator0.getvalue()) - all_in=fin_result.read().strip().split('\n\n') - fin_result.close() - #print('no species:',len(all_in)) - for doc in all_in: - lines=doc.split('\n') - pmid=lines[0].split('|t|')[0] - - for i in range(2,len(lines)): - segs=lines[i].split('\t') - if (pmid in cache_geneid.keys()) and (segs[4] in cache_geneid[pmid].keys()):#same gene in doc - marjor_species = max(zip(cache_geneid[pmid][segs[4]].values(), cache_geneid[pmid][segs[4]].keys())) - if pmid not in final_sa_results.keys(): - final_sa_results[pmid]={segs[1]:marjor_species[1]} - else: - final_sa_results[pmid][segs[1]]=marjor_species[1] - else: #marjor species in doc - if (pmid in species_count.keys()) and len(species_count[pmid])>0:#marjor species in doc - marjor_species = max(zip(species_count[pmid].values(), species_count[pmid].keys())) - - if pmid not in final_sa_results.keys(): - final_sa_results[pmid]={segs[1]:'Focus:'+marjor_species[1][1:]} - else: - final_sa_results[pmid][segs[1]]='Focus:'+marjor_species[1][1:] - else:#no any species in doc,assign human - if pmid not in final_sa_results.keys(): - final_sa_results[pmid]={segs[1]:'Focus:9606'} - else: - final_sa_results[pmid][segs[1]]='Focus:9606' - - - - # print(final_sa_results) - fin = open(infolder+"/"+infile, 'r',encoding='utf-8') - fout_xml=open(outpath+"/"+infile,'w', encoding='utf8') - collection = bioc.load(fin) - for document in collection.documents: - doc_pmid=document.id - # print(final_sa_results[doc_pmid]) - # print(doc_pmid) - for passage in document.passages: - for temp_annotation in passage.annotations: - if 'Identifier' not in temp_annotation.infons.keys(): - if temp_annotation.id in final_sa_results[doc_pmid].keys(): - if final_sa_results[doc_pmid][temp_annotation.id][6:] in virus_set: - temp_annotation.infons['Identifier']=final_sa_results[doc_pmid][temp_annotation.id]+',9606' - # print('!!! virus:', doc_pmid) - else: - temp_annotation.infons['Identifier']=final_sa_results[doc_pmid][temp_annotation.id] - else: #same text bug - if (doc_pmid in cache_geneid.keys()) and (temp_annotation.text in cache_geneid[doc_pmid].keys()):#same gene in doc - marjor_species = max(zip(cache_geneid[doc_pmid][temp_annotation.text].values(), cache_geneid[doc_pmid][temp_annotation.text].keys())) - temp_annotation.infons['Identifier']=marjor_species[1] - else: - - temp_annotation.infons['Identifier']='Focus:9606' - bioc.dump(collection, fout_xml, pretty_print=True) - fin.close() - fout_xml.close() - - -#SA for PubTator format -def SA_PubTator(infolder,infile,outpath,nn_model,virus_set,prefix_dict): - - - # pmid|t|text1 - #pmid|a|text2 - #pmid entity_id sid eid entity_txt entity_type (gene is blank) - fin = open(infolder+"/"+infile, 'r',encoding='utf-8') - # fout_pubtator=open(outpath+'tmp/input_xml.pubtator','w', encoding='utf-8') - fin_pubtator2=io.StringIO() #two or more species - all_in_ori=fin.read().strip().split('\n\n') - fin.close() - species_gene_count={} #{pmid:{'spec':_species_num;'gene':_gene_num}} - gene_set=['Gene','FamilyName'] - ML_results={} #{'pmid':{'sid-eid':species_id}} - - prefix_speid_allset=set(prefix_dict.keys()) - - for document in all_in_ori: - lines=document.split('\n') - doc_pmid=lines[0].split('|t|')[0] - doc_title=lines[0].split('|t|')[1] - doc_abstract=lines[1].split('|a|')[1] - doc_annotation=[] - _species_num=set() #(*speciesid) - _gene_num=0 - _ML_gene_num=0 - _entity_num=0 - _prefix_str2id_dict={} #{prestr:id} - for i in range(2,len(lines)): - segs=lines[i].split('\t') - if segs[4] in gene_set: - _gene_num+=1 - if len(segs)>=6: #species - doc_annotation.append(segs[0]+'\t'+str(_entity_num)+'\t'+'\t'.join(segs[1:])) - species_ID=segs[-1] - if species_ID.find('*')>=0: - _species_num.add(species_ID) - if species_ID[1:] in prefix_speid_allset: - for ele in prefix_dict[species_ID[1:]]: - _prefix_str2id_dict[ele]=species_ID[1:] - else: #gene - if segs[3][0:2] in _prefix_str2id_dict:#prefix rule - if _prefix_str2id_dict[segs[3][0:2]] in virus_set: - doc_annotation.append(segs[0]+'\t'+str(_entity_num)+'\t'+'\t'.join(segs[1:])+'\tFocus:'+_prefix_str2id_dict[segs[3][0:2]]+',9606') - if doc_pmid not in ML_results.keys(): - ML_results[doc_pmid]={segs[1]+'-'+segs[2]:_prefix_str2id_dict[segs[3][0:2]]+',9606'} - else: - ML_results[doc_pmid][segs[1]+'-'+segs[2]]=_prefix_str2id_dict[segs[3][0:2]]+',9606' - - # print('!!! prefixr and virus:', doc_pmid) - else: - doc_annotation.append(segs[0]+'\t'+str(_entity_num)+'\t'+'\t'.join(segs[1:])+'\tFocus:'+_prefix_str2id_dict[segs[3][0:2]]) - if doc_pmid not in ML_results.keys(): - ML_results[doc_pmid]={segs[1]+'-'+segs[2]:_prefix_str2id_dict[segs[3][0:2]]} - else: - ML_results[doc_pmid][segs[1]+'-'+segs[2]]=_prefix_str2id_dict[segs[3][0:2]] - # print('prefix rule!!',_prefix_str2id_dict) - # print(doc_pmid) - else: - doc_annotation.append(segs[0]+'\t'+str(_entity_num)+'\t'+'\t'.join(segs[1:])) - if segs[4] in gene_set: - _ML_gene_num+=1 - _entity_num+=1 - - if len(_species_num)>=2 and _ML_gene_num>0: - fin_pubtator2.write(doc_pmid+'|t|'+doc_title+'\n') - fin_pubtator2.write(doc_pmid+'|a|'+doc_abstract+'\n') - for ele in doc_annotation: - fin_pubtator2.write(ele+'\n') - fin_pubtator2.write('\n') - - species_gene_count[doc_pmid]={'spec':_species_num,'gene':_gene_num} - - if fin_pubtator2.getvalue()!='': - #pubtator format ML tagging - #print(fin_pubtator2.getvalue()) - ml_out= sa_tag.ml_tag_main(fin_pubtator2,nlp_token, nn_model) - #print(ml_out.getvalue()) - fin_result=io.StringIO(ml_out.getvalue()) - all_in=fin_result.read().strip().split('\n\n') - #print('+2 species:',len(all_in)) - fin_result.close() - for doc in all_in: - lines=doc.split('\n') - pmid=lines[0].split('|t|')[0] - - for i in range(2,len(lines)): - segs=lines[i].split('\t') - if pmid not in ML_results.keys(): - ML_results[pmid]={segs[2]+'-'+segs[3]:segs[-1]} - else: - ML_results[pmid][segs[2]+'-'+segs[3]]=segs[-1] - - #output - fout_pubtator=open(outpath+"/"+infile,'w', encoding='utf8') - for doc in all_in_ori: - lines=doc.split('\n') - pmid=lines[0].split('|t|')[0] - fout_pubtator.write(lines[0]+'\n'+lines[1]+'\n') - if len(species_gene_count[pmid]['spec'])>1 and species_gene_count[pmid]['gene']>0: # ML - for i in range(2,len(lines)): - segs=lines[i].split('\t') - if len(segs)>=6: #species - fout_pubtator.write(lines[i]+'\n') - else:#gene - if ML_results[pmid][segs[1]+'-'+segs[2]] in virus_set: - fout_pubtator.write(lines[i]+'\tFocus:'+ML_results[pmid][segs[1]+'-'+segs[2]]+',9606'+'\n') - # print('!!! virus:', pmid) - else: - fout_pubtator.write(lines[i]+'\tFocus:'+ML_results[pmid][segs[1]+'-'+segs[2]]+'\n') - fout_pubtator.write('\n') - - elif len(species_gene_count[pmid]['spec'])==1 and species_gene_count[pmid]['gene']>0: #only one species - for i in range(2,len(lines)): - segs=lines[i].split('\t') - if len(segs)>=6: #species - fout_pubtator.write(lines[i]+'\n') - else:#gene - major_species,=species_gene_count[pmid]['spec'] - if major_species[1:] in virus_set: - fout_pubtator.write(lines[i]+'\tFocus:'+major_species[1:]+',9606'+'\n') - # print('!!! virus:', pmid) - fout_pubtator.write(lines[i]+'\tFocus:'+major_species[1:]+'\n') - fout_pubtator.write('\n') - - elif len(species_gene_count[pmid]['spec'])==0 and species_gene_count[pmid]['gene']>0:#no species - for i in range(2,len(lines)): - segs=lines[i].split('\t') - if len(segs)>=6: #species - fout_pubtator.write(lines[i]+'\n') - else:#gene - fout_pubtator.write(lines[i]+'\tFocus:9606'+'\n') - fout_pubtator.write('\n') - - else: - for i in range(2,len(lines)): - fout_pubtator.write(lines[i]+'\n') - fout_pubtator.write('\n') - fout_pubtator.close() - - -#SA main -def speciesAss(infolder,outpath, modelfile): - - if modelfile.lower().find('bioformer')>=0: - model_type='bioformer' - else: - model_type='pubmedbert' - - print('loading SA models........') - if model_type=='bioformer': - - vocabfiles={'labelfile':'./vocab/SpeAss_IO_label.vocab', - 'checkpoint_path':'./gnorm_trained_models/bioformer-cased-v1.0/', - 'lowercase':False, - } - else: - vocabfiles={'labelfile':'./vocab/SpeAss_IO_label.vocab', - 'checkpoint_path':'./gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/', - 'lowercase':True, - } - - nn_model=model_sa.HUGFACE_NER(vocabfiles) - nn_model.build_encoder() - nn_model.build_softmax_decoder() - nn_model.load_model(modelfile) - - dict_filename={'prefix':'./Dictionary/SPPrefix.txt', - 'virus':'./Dictionary/SP_Virus2HumanList.txt'} - fin=open(dict_filename['virus'],'r',encoding='utf-8') - virus_set=set(fin.read().strip().split('\n')) - fin.close() - - prefix_dict={}#{id:[prefix1,prefix2]} - fin=open(dict_filename['prefix'],'r',encoding='utf-8') - for line in fin: - seg= line.strip().split('\t') - if seg[0] not in prefix_dict.keys(): - prefix_dict[seg[0]]=seg[1].split('|') - else: - prefix_dict[seg[0]].extend(seg[1].split('|')) - fin.close() - - - - print("begin species assignment........") - start_time=time.time() - - for infile in os.listdir(infolder): - if os.path.isfile(outpath+"/"+infile): - print(infile+' has exsited.') - else: - print('Processing:',infile) - fin=open(infolder+"/"+infile, 'r',encoding='utf-8') - file_format="" - for line in fin: - pattern_bioc = re.compile('.*.*') - pattern_pubtator = re.compile('^([^\|]+)\|[^\|]+\|(.*)') - if pattern_bioc.search(line): - file_format="BioC" - break - elif pattern_pubtator.search(line): - file_format="PubTator" - break - fin.close() - if(file_format == "PubTator"): - SA_PubTator(infolder,infile,outpath,nn_model,virus_set,prefix_dict) - elif(file_format == "BioC"): - SA_BioC(infolder,infile,outpath,nn_model,virus_set,prefix_dict) - - - print('species assignment done:',time.time()-start_time) - -if __name__=='__main__': - - parser = argparse.ArgumentParser(description='run GeneNER and species assignment, python GeneNER_SpeAss_run.py -i input -n NERmodel -s SAmodel -r neroutput -a saoutput') - parser.add_argument('--infolder', '-i', help="input folder",default='./example/input/') - parser.add_argument('--NERmodel', '-n', help="trained deep learning NER model file",default='') - parser.add_argument('--SAmodel', '-s', help="trained deep learning species assignment model file",default='') - parser.add_argument('--NERoutpath', '-r', help="output folder to save the NER tagged results",default='./example/ner_output/') - parser.add_argument('--SAoutpath', '-a', help="output folder to save the SA tagged results",default='./example/sa_output/') - parser.add_argument('--NUM_THREADS', '-t', help="Number of threads",default='3') - args = parser.parse_args() - - - if args.NUM_THREADS.isdigit() == False: - args.NUM_THREADS='3' - - tf.config.threading.set_inter_op_parallelism_threads(int(args.NUM_THREADS)) - tf.config.threading.set_intra_op_parallelism_threads(int(args.NUM_THREADS)) - - if args.NERmodel!='' and args.SAmodel!='': - - #pipleline - print('==============\n| GeneNER and SpeAss |\n==============') - - #creat output folder - - if args.infolder[-1]!='/': - args.infolder+='/' - if not os.path.exists(args.infolder): - os.makedirs(args.infolder) - - if args.NERoutpath[-1]!='/': - args.NERoutpath+='/' - if not os.path.exists(args.NERoutpath): - os.makedirs(args.NERoutpath) - - if args.SAoutpath[-1]!='/': - args.SAoutpath+='/' - if not os.path.exists(args.SAoutpath): - os.makedirs(args.SAoutpath) - - #1. gene NER, the results are saved in outpath/ner_tmp/ - geneNER(args.infolder,args.NERoutpath, args.NERmodel) - - - #2. species assignment, the results are saved in outpath/sa_tmp/ - speciesAss(args.NERoutpath,args.SAoutpath, args.SAmodel) - - elif args.NERmodel!='' and args.SAmodel=='': - if args.infolder[-1]!='/': - args.infolder+='/' - if not os.path.exists(args.infolder): - os.makedirs(args.infolder) - - # only geneNER - if args.NERoutpath[-1]!='/': - args.NERoutpath+='/' - if not os.path.exists(args.NERoutpath): - os.makedirs(args.NERoutpath) - - print('==============\n| GeneNER |\n==============') - geneNER(args.infolder,args.NERoutpath,args.NERmodel) - - elif args.NERmodel=='' and args.SAmodel!='': - # only speass - if args.SAoutpath[-1]!='/': - args.SAoutpath+='/' - if not os.path.exists(args.SAoutpath): - os.makedirs(args.SAoutpath) - - print('==============\n| SpeAss |\n==============') - speciesAss(args.infolder,args.SAoutpath,args.SAmodel) - else: - print('Please provide models!') - - +# -*- coding: utf-8 -*- +""" +Created on Wed Jun 8 09:26:57 2022 + +@author: luol2 + +Pipeline: first gene NER, then species assignment +input: species NER bioc xml file +output: gene ner and species assignment results bioc xml file +""" +import argparse +import os +import io +import time +import sys +import re +import shutil +from src_python.GeneNER import model_ner,ner_tag +from src_python.SpeAss import model_sa,sa_tag + +import tensorflow as tf + +import bioc +import stanza +nlp_token = stanza.Pipeline(model_dir='gnorm_trained_models/stanza', lang='en', processors={'tokenize': 'spacy'},package='None', download_method=None) #package='craft' ;./gnorm_trained_models/stanza + +def NER_BioC(infolder,infile,outpath,nn_model): + + with open(infolder+"/"+infile, 'r',encoding='utf-8') as fin: + with open(outpath+"/"+infile,'w', encoding='utf8') as fout: + collection = bioc.load(fin) + + Total_n=len(collection.documents) + print('Total number of sub-documents:', Total_n) + pmid_n=0 + for document in collection.documents: + print("Processing:{0}%".format(round(pmid_n * 100 / Total_n)), end="\r") + pmid_n+=1 + # print(document.id) + mention_num_new=0 + for passage in document.passages: + if passage.text!='' and (not passage.text.isspace()) and passage.infons['type']!='ref': # have text and is not ref + passage_offset=passage.offset + tag_result=ner_tag.ML_Tag(passage.text,nn_model,nlp_token) + mention_num=0 + for ele in tag_result: + bioc_note = bioc.BioCAnnotation() + bioc_note.id = str(mention_num) + mention_num+=1 + bioc_note.infons['type'] = ele[2] + start = int(ele[0]) + last = int(ele[1]) + loc = bioc.BioCLocation(offset=str(passage_offset+start), length= str(last-start)) + bioc_note.locations.append(loc) + bioc_note.text = passage.text[start:last] + passage.annotations.append(bioc_note) + #update id + for temp_annotation in passage.annotations: + temp_annotation.id=str(mention_num_new) + mention_num_new+=1 + bioc.dump(collection, fout, pretty_print=True) + +def NER_PubTator(infolder,infile,outpath,nn_model): + with open(infolder+"/"+infile, 'r',encoding='utf-8') as fin: + with open(outpath+"/"+infile,'w', encoding='utf-8') as fout: + title='' + abstract='' + all_text=fin.read().strip().split('\n\n') + Total_n=len(all_text) + print('Total number of sub-documents:', Total_n) + pmid_n=0 + for doc in all_text: + print("Processing:{0}%".format(round(pmid_n * 100 / Total_n)), end="\r") + pmid_n+=1 + lines = doc.split('\n') + seg=lines[0].split('|t|') + pmid=seg[0] + title="" + if len(seg)>1: + title=seg[1] + abstract="" + if len(lines)>1: + seg=lines[1].split('|a|') + abstract=seg[1] + if len(seg)>1: + abstract=seg[1] + + intext=title+' '+abstract + tag_result=ner_tag.ML_Tag(intext,nn_model,nlp_token) + fout.write(doc+'\n') + for ele in tag_result: + ent_start = ele[0] + ent_last = ele[1] + ent_mention = intext[int(ele[0]):int(ele[1])] + ent_type=ele[2] + fout.write(pmid+"\t"+ent_start+"\t"+ent_last+"\t"+ent_mention+"\t"+ent_type+"\n") + fout.write('\n') + title='' + abstract='' + +def geneNER(infolder, outpath, modelfile): + + print('loading NER models........') + + if modelfile.lower().find('bioformer')>=0: + vocabfiles={'labelfile':'./vocab/GeneNER_label.vocab', + 'checkpoint_path':'./gnorm_trained_models/bioformer-cased-v1.0/', #bioformer-cased-v1.0 + 'lowercase':False, + } + else: + vocabfiles={'labelfile':'./vocab/GeneNER_label.vocab', + 'checkpoint_path':'./gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/', + 'lowercase':True, + } + + nn_model=model_ner.HUGFACE_NER(vocabfiles) + nn_model.build_encoder() + nn_model.build_softmax_decoder() + nn_model.load_model(modelfile) + + #tagging text + print("begin GeneNER tagging........") + start_time=time.time() + + for infile in os.listdir(infolder): + if os.path.isfile(outpath+"/"+infile): + print(infile+' has exsited.') + else: + print('processing:',infile) + fin = open(infolder+"/"+infile, 'r',encoding='utf-8') + input_format="" + for line in fin: + pattern_bioc = re.compile('.*.*') + pattern_pubtator = re.compile('^([^\|]+)\|[^\|]+\|(.*)') + if pattern_bioc.search(line): + input_format="BioC" + break + elif pattern_pubtator.search(line): + input_format="PubTator" + break + fin.close() + if(input_format == "PubTator"): + NER_PubTator(infolder,infile,outpath,nn_model) + elif(input_format == "BioC"): + NER_BioC(infolder,infile,outpath,nn_model) + + print('tag done:',time.time()-start_time) + + +#SA for bioc format +def SA_BioC(infolder,infile,outpath,nn_model,virus_set,prefix_dict): + + #BioC xml to pubtator + # pmid|t|text1 + #pmid|a|text2 + #pmid sid eid entity_txt entity_type entity_id (gene is blank) + fin = open(infolder+"/"+infile, 'r',encoding='utf-8') + # fout_pubtator=open(outpath+'tmp/input_xml.pubtator','w', encoding='utf-8') + fin_pubtator0=io.StringIO() #none *species + fin_pubtator1=io.StringIO() #one *species + fin_pubtator2=io.StringIO() #two or more species + collection = bioc.load(fin) + fin.close() + ori_ann_index={} #{'pmid':{'ent.id':'ent_s-ent_e'}} + species_count={} #{pmid:{speid:num}} + gene_set=['Gene','FamilyName'] + final_sa_results={} #{'pmid':{'entity_id':species_id}} + for document in collection.documents: + doc_pmid=document.id + doc_title='' + doc_abstract='' + doc_annotation=[] + _ann_index={} + _species_num={} #{*speciesid:num} + _gene_num=0 + _passage_num=0 + if len(document.passages)<=2: #abstract xml or PMC only have title + for passage in document.passages: + passage_offset=passage.offset + _passage_num+=1 + #print(passage_offset,type(passage_offset)) + #if passage.infons['type']=='title' or passage.infons['type']=='front': + if _passage_num==1: + doc_title=passage.text + for temp_annotation in passage.annotations: + if temp_annotation.infons['type'] in gene_set: + _gene_num+=1 + ent_start=temp_annotation.locations[0].offset-passage_offset + ent_end=ent_start+temp_annotation.locations[0].length + #print(ent_start,ent_end) + _ann_index[temp_annotation.id]=str(ent_start)+'-'+str(ent_end) + # print(temp_annotation.infons) + if 'Identifier' in temp_annotation.infons.keys(): + # print(temp_annotation.infons.keys['Identifier']) + species_ID=temp_annotation.infons['Identifier'] + if species_ID.find('*')>=0: + if species_ID not in _species_num.keys(): + _species_num[species_ID]=1 + else: + _species_num[species_ID]+=1 + doc_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']+'\t'+species_ID) + else: + doc_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']) + + #elif passage.infons['type']=='abstract' or passage.infons['type']=='paragraph': + else: + doc_abstract=passage.text + for temp_annotation in passage.annotations: + if temp_annotation.infons['type'] in gene_set: + _gene_num+=1 + ent_start=len(doc_title)+1+temp_annotation.locations[0].offset-passage_offset + ent_end=ent_start+temp_annotation.locations[0].length + #print(ent_start,ent_end) + _ann_index[temp_annotation.id]=str(ent_start)+'-'+str(ent_end) + if 'Identifier' in temp_annotation.infons.keys(): + # print(temp_annotation.infons.keys['Identifier']) + species_ID=temp_annotation.infons['Identifier'] + if species_ID.find('*')>=0: + if species_ID not in _species_num.keys(): + _species_num[species_ID]=1 + else: + _species_num[species_ID]+=1 + doc_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']+'\t'+species_ID) + else: + doc_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']) + + if len(_species_num)>=2 and _gene_num>0: + fin_pubtator2.write(doc_pmid+'|t|'+doc_title+'\n') + fin_pubtator2.write(doc_pmid+'|a|'+doc_abstract+'\n') + for ele in doc_annotation: + fin_pubtator2.write(ele+'\n') + fin_pubtator2.write('\n') + elif len(_species_num)==1 and _gene_num>0: #可以直接给结果 + fin_pubtator1.write(doc_pmid+'|t|'+doc_title+'\n') + fin_pubtator1.write(doc_pmid+'|a|'+doc_abstract+'\n') + major_speicesid,=_species_num + fin_pubtator1.write(major_speicesid[1:]+'\n') + for ele in doc_annotation: + fin_pubtator1.write(ele+'\n') + fin_pubtator1.write('\n') + elif len(_species_num)==0 and _gene_num>0: + fin_pubtator0.write(doc_pmid+'|t|'+doc_title+'\n') + fin_pubtator0.write(doc_pmid+'|a|'+doc_abstract+'\n') + for ele in doc_annotation: + fin_pubtator0.write(ele+'\n') + fin_pubtator0.write('\n') + + else: # full text xml + for passage in document.passages: + passage_annotation=[] + _species_num_passage={} + _gene_num_passage=0 + passage_offset=passage.offset + #print(passage_offset,type(passage_offset)) + if passage.text!='' and (not passage.text.isspace()) and passage.infons['type']!='ref': + doc_title=passage.text + for temp_annotation in passage.annotations: + if temp_annotation.infons['type'] in gene_set: + _gene_num_passage+=1 + ent_start=temp_annotation.locations[0].offset-passage_offset + ent_end=ent_start+temp_annotation.locations[0].length + #print(ent_start,ent_end) + _ann_index[temp_annotation.id]=str(ent_start)+'-'+str(ent_end) + # print(temp_annotation.infons) + if 'Identifier' in temp_annotation.infons.keys(): + # print(temp_annotation.infons.keys['Identifier']) + species_ID=temp_annotation.infons['Identifier'] + if species_ID.find('*')>=0: + if species_ID not in _species_num.keys(): + _species_num[species_ID]=1 + else: + _species_num[species_ID]+=1 + if species_ID not in _species_num_passage.keys(): + _species_num_passage[species_ID]=1 + else: + _species_num_passage[species_ID]+=1 + passage_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']+'\t'+species_ID) + else: + passage_annotation.append(doc_pmid+'\t'+temp_annotation.id+'\t'+str(ent_start)+'\t'+str(ent_end)+'\t'+temp_annotation.text+'\t'+temp_annotation.infons['type']) + + + if len(_species_num_passage)>=2 and _gene_num_passage>0: + fin_pubtator2.write(doc_pmid+'|t|'+doc_title+'\n') + fin_pubtator2.write(doc_pmid+'|a|'+doc_abstract+'\n') + for ele in passage_annotation: + fin_pubtator2.write(ele+'\n') + fin_pubtator2.write('\n') + elif len(_species_num_passage)==1 and _gene_num_passage>0: #可以直接给结果 + fin_pubtator1.write(doc_pmid+'|t|'+doc_title+'\n') + fin_pubtator1.write(doc_pmid+'|a|'+doc_abstract+'\n') + major_speicesid,=_species_num_passage + fin_pubtator1.write(major_speicesid[1:]+'\n') + for ele in passage_annotation: + fin_pubtator1.write(ele+'\n') + fin_pubtator1.write('\n') + elif len(_species_num_passage)==0 and _gene_num_passage>0: + fin_pubtator0.write(doc_pmid+'|t|'+doc_title+'\n') + fin_pubtator0.write(doc_pmid+'|a|'+doc_abstract+'\n') + for ele in passage_annotation: + fin_pubtator0.write(ele+'\n') + fin_pubtator0.write('\n') + # print(ori_ann_index) + + ori_ann_index[doc_pmid]=_ann_index + species_count[doc_pmid]=_species_num + + + cache_geneid={} #{pmid:{gene1:{id1:num,id2:num}}} + + if fin_pubtator2.getvalue()!='': + #pubtator format ML tagging + # print(fin_pubtator2.getvalue()) + ml_out= sa_tag.ml_tag_main(fin_pubtator2,nlp_token, nn_model) + #print(ml_out.getvalue()) + fin_result=io.StringIO(ml_out.getvalue()) + all_in=fin_result.read().strip().split('\n\n') + #print('+2 species:',len(all_in)) + fin_result.close() + + prefix_speid_allset=set(prefix_dict.keys()) + + for doc in all_in: + lines=doc.split('\n') + pmid=lines[0].split('|t|')[0] + _prefix_str2id_dict={} + doc_species=list(species_count[pmid].keys()) + for _spe_ele in doc_species: + if _spe_ele[1:] in prefix_speid_allset: + for ele in prefix_dict[_spe_ele[1:]]: + _prefix_str2id_dict[ele]=_spe_ele[1:] + + for i in range(2,len(lines)): + segs=lines[i].split('\t') + if pmid not in final_sa_results.keys(): + final_sa_results[pmid]={segs[1]:'Focus:'+segs[-1]} + else: + final_sa_results[pmid][segs[1]]='Focus:'+segs[-1] + + if segs[5] in gene_set: + if segs[4][0:2] in _prefix_str2id_dict: #prefix rule + #print('prefix rule:', pmid) + # print(_prefix_str2id_dict) + if pmid not in final_sa_results.keys(): + final_sa_results[pmid]={segs[1]:'Focus:'+_prefix_str2id_dict[segs[4][0:2]]} + else: + final_sa_results[pmid][segs[1]]='Focus:'+_prefix_str2id_dict[segs[4][0:2]] + if pmid not in cache_geneid.keys(): + cache_geneid[pmid]={segs[4]:{'Focus:'+segs[-1]:1}} + else: + if segs[4] not in cache_geneid[pmid].keys(): + cache_geneid[pmid][segs[4]]={'Focus:'+segs[-1]:1} + else: + if segs[-1] not in cache_geneid[pmid][segs[4]].keys(): + cache_geneid[pmid][segs[4]]['Focus:'+segs[-1]]=1 + else: + cache_geneid[pmid][segs[4]]['Focus:'+segs[-1]]+=1 + + #print(final_sa_results) + + #one species + if fin_pubtator1.getvalue()!='': + fin_result=io.StringIO(fin_pubtator1.getvalue()) + all_in=fin_result.read().strip().split('\n\n') + fin_result.close() + #print('1 species:',len(all_in)) + for doc in all_in: + lines=doc.split('\n') + pmid=lines[0].split('|t|')[0] + major_speicesid=lines[2] + for i in range(3,len(lines)): + segs=lines[i].split('\t') + if len(segs)>=7:#species + if pmid not in final_sa_results.keys(): + final_sa_results[pmid]={segs[1]:segs[-1]} + else: + final_sa_results[pmid][segs[1]]=segs[-1] + else:#gene + marjor_species='Focus:'+major_speicesid + if pmid not in final_sa_results.keys(): + final_sa_results[pmid]={segs[1]:marjor_species} + else: + final_sa_results[pmid][segs[1]]=marjor_species + if pmid not in cache_geneid.keys(): + cache_geneid[pmid]={segs[4]:{marjor_species:1}} + else: + if segs[4] not in cache_geneid[pmid].keys(): + cache_geneid[pmid][segs[4]]={marjor_species:1} + else: + if segs[-1] not in cache_geneid[pmid][segs[4]].keys(): + cache_geneid[pmid][segs[4]][marjor_species]=1 + else: + cache_geneid[pmid][segs[4]][marjor_species]+=1 + + + #no species + fin_result=io.StringIO(fin_pubtator0.getvalue()) + all_in=fin_result.read().strip().split('\n\n') + fin_result.close() + #print('no species:',len(all_in)) + for doc in all_in: + lines=doc.split('\n') + pmid=lines[0].split('|t|')[0] + + for i in range(2,len(lines)): + segs=lines[i].split('\t') + if (pmid in cache_geneid.keys()) and (segs[4] in cache_geneid[pmid].keys()):#same gene in doc + marjor_species = max(zip(cache_geneid[pmid][segs[4]].values(), cache_geneid[pmid][segs[4]].keys())) + if pmid not in final_sa_results.keys(): + final_sa_results[pmid]={segs[1]:marjor_species[1]} + else: + final_sa_results[pmid][segs[1]]=marjor_species[1] + else: #marjor species in doc + if (pmid in species_count.keys()) and len(species_count[pmid])>0:#marjor species in doc + marjor_species = max(zip(species_count[pmid].values(), species_count[pmid].keys())) + + if pmid not in final_sa_results.keys(): + final_sa_results[pmid]={segs[1]:'Focus:'+marjor_species[1][1:]} + else: + final_sa_results[pmid][segs[1]]='Focus:'+marjor_species[1][1:] + else:#no any species in doc,assign human + if pmid not in final_sa_results.keys(): + final_sa_results[pmid]={segs[1]:'Focus:9606'} + else: + final_sa_results[pmid][segs[1]]='Focus:9606' + + + + # print(final_sa_results) + fin = open(infolder+"/"+infile, 'r',encoding='utf-8') + fout_xml=open(outpath+"/"+infile,'w', encoding='utf8') + collection = bioc.load(fin) + for document in collection.documents: + doc_pmid=document.id + # print(final_sa_results[doc_pmid]) + # print(doc_pmid) + for passage in document.passages: + for temp_annotation in passage.annotations: + if 'Identifier' not in temp_annotation.infons.keys(): + if temp_annotation.id in final_sa_results[doc_pmid].keys(): + if final_sa_results[doc_pmid][temp_annotation.id][6:] in virus_set: + temp_annotation.infons['Identifier']=final_sa_results[doc_pmid][temp_annotation.id]+',9606' + # print('!!! virus:', doc_pmid) + else: + temp_annotation.infons['Identifier']=final_sa_results[doc_pmid][temp_annotation.id] + else: #same text bug + if (doc_pmid in cache_geneid.keys()) and (temp_annotation.text in cache_geneid[doc_pmid].keys()):#same gene in doc + marjor_species = max(zip(cache_geneid[doc_pmid][temp_annotation.text].values(), cache_geneid[doc_pmid][temp_annotation.text].keys())) + temp_annotation.infons['Identifier']=marjor_species[1] + else: + + temp_annotation.infons['Identifier']='Focus:9606' + bioc.dump(collection, fout_xml, pretty_print=True) + fin.close() + fout_xml.close() + + +#SA for PubTator format +def SA_PubTator(infolder,infile,outpath,nn_model,virus_set,prefix_dict): + + + # pmid|t|text1 + #pmid|a|text2 + #pmid entity_id sid eid entity_txt entity_type (gene is blank) + fin = open(infolder+"/"+infile, 'r',encoding='utf-8') + # fout_pubtator=open(outpath+'tmp/input_xml.pubtator','w', encoding='utf-8') + fin_pubtator2=io.StringIO() #two or more species + all_in_ori=fin.read().strip().split('\n\n') + fin.close() + species_gene_count={} #{pmid:{'spec':_species_num;'gene':_gene_num}} + gene_set=['Gene','FamilyName'] + ML_results={} #{'pmid':{'sid-eid':species_id}} + + prefix_speid_allset=set(prefix_dict.keys()) + + for document in all_in_ori: + lines=document.split('\n') + doc_pmid=lines[0].split('|t|')[0] + doc_title=lines[0].split('|t|')[1] + doc_abstract=lines[1].split('|a|')[1] + doc_annotation=[] + _species_num=set() #(*speciesid) + _gene_num=0 + _ML_gene_num=0 + _entity_num=0 + _prefix_str2id_dict={} #{prestr:id} + for i in range(2,len(lines)): + segs=lines[i].split('\t') + if segs[4] in gene_set: + _gene_num+=1 + if len(segs)>=6: #species + doc_annotation.append(segs[0]+'\t'+str(_entity_num)+'\t'+'\t'.join(segs[1:])) + species_ID=segs[-1] + if species_ID.find('*')>=0: + _species_num.add(species_ID) + if species_ID[1:] in prefix_speid_allset: + for ele in prefix_dict[species_ID[1:]]: + _prefix_str2id_dict[ele]=species_ID[1:] + else: #gene + if segs[3][0:2] in _prefix_str2id_dict:#prefix rule + if _prefix_str2id_dict[segs[3][0:2]] in virus_set: + doc_annotation.append(segs[0]+'\t'+str(_entity_num)+'\t'+'\t'.join(segs[1:])+'\tFocus:'+_prefix_str2id_dict[segs[3][0:2]]+',9606') + if doc_pmid not in ML_results.keys(): + ML_results[doc_pmid]={segs[1]+'-'+segs[2]:_prefix_str2id_dict[segs[3][0:2]]+',9606'} + else: + ML_results[doc_pmid][segs[1]+'-'+segs[2]]=_prefix_str2id_dict[segs[3][0:2]]+',9606' + + # print('!!! prefixr and virus:', doc_pmid) + else: + doc_annotation.append(segs[0]+'\t'+str(_entity_num)+'\t'+'\t'.join(segs[1:])+'\tFocus:'+_prefix_str2id_dict[segs[3][0:2]]) + if doc_pmid not in ML_results.keys(): + ML_results[doc_pmid]={segs[1]+'-'+segs[2]:_prefix_str2id_dict[segs[3][0:2]]} + else: + ML_results[doc_pmid][segs[1]+'-'+segs[2]]=_prefix_str2id_dict[segs[3][0:2]] + # print('prefix rule!!',_prefix_str2id_dict) + # print(doc_pmid) + else: + doc_annotation.append(segs[0]+'\t'+str(_entity_num)+'\t'+'\t'.join(segs[1:])) + if segs[4] in gene_set: + _ML_gene_num+=1 + _entity_num+=1 + + if len(_species_num)>=2 and _ML_gene_num>0: + fin_pubtator2.write(doc_pmid+'|t|'+doc_title+'\n') + fin_pubtator2.write(doc_pmid+'|a|'+doc_abstract+'\n') + for ele in doc_annotation: + fin_pubtator2.write(ele+'\n') + fin_pubtator2.write('\n') + + species_gene_count[doc_pmid]={'spec':_species_num,'gene':_gene_num} + + if fin_pubtator2.getvalue()!='': + #pubtator format ML tagging + #print(fin_pubtator2.getvalue()) + ml_out= sa_tag.ml_tag_main(fin_pubtator2,nlp_token, nn_model) + #print(ml_out.getvalue()) + fin_result=io.StringIO(ml_out.getvalue()) + all_in=fin_result.read().strip().split('\n\n') + #print('+2 species:',len(all_in)) + fin_result.close() + for doc in all_in: + lines=doc.split('\n') + pmid=lines[0].split('|t|')[0] + + for i in range(2,len(lines)): + segs=lines[i].split('\t') + if pmid not in ML_results.keys(): + ML_results[pmid]={segs[2]+'-'+segs[3]:segs[-1]} + else: + ML_results[pmid][segs[2]+'-'+segs[3]]=segs[-1] + + #output + fout_pubtator=open(outpath+"/"+infile,'w', encoding='utf8') + for doc in all_in_ori: + lines=doc.split('\n') + pmid=lines[0].split('|t|')[0] + fout_pubtator.write(lines[0]+'\n'+lines[1]+'\n') + if len(species_gene_count[pmid]['spec'])>1 and species_gene_count[pmid]['gene']>0: # ML + for i in range(2,len(lines)): + segs=lines[i].split('\t') + if len(segs)>=6: #species + fout_pubtator.write(lines[i]+'\n') + else:#gene + if ML_results[pmid][segs[1]+'-'+segs[2]] in virus_set: + fout_pubtator.write(lines[i]+'\tFocus:'+ML_results[pmid][segs[1]+'-'+segs[2]]+',9606'+'\n') + # print('!!! virus:', pmid) + else: + fout_pubtator.write(lines[i]+'\tFocus:'+ML_results[pmid][segs[1]+'-'+segs[2]]+'\n') + fout_pubtator.write('\n') + + elif len(species_gene_count[pmid]['spec'])==1 and species_gene_count[pmid]['gene']>0: #only one species + for i in range(2,len(lines)): + segs=lines[i].split('\t') + if len(segs)>=6: #species + fout_pubtator.write(lines[i]+'\n') + else:#gene + major_species,=species_gene_count[pmid]['spec'] + if major_species[1:] in virus_set: + fout_pubtator.write(lines[i]+'\tFocus:'+major_species[1:]+',9606'+'\n') + # print('!!! virus:', pmid) + fout_pubtator.write(lines[i]+'\tFocus:'+major_species[1:]+'\n') + fout_pubtator.write('\n') + + elif len(species_gene_count[pmid]['spec'])==0 and species_gene_count[pmid]['gene']>0:#no species + for i in range(2,len(lines)): + segs=lines[i].split('\t') + if len(segs)>=6: #species + fout_pubtator.write(lines[i]+'\n') + else:#gene + fout_pubtator.write(lines[i]+'\tFocus:9606'+'\n') + fout_pubtator.write('\n') + + else: + for i in range(2,len(lines)): + fout_pubtator.write(lines[i]+'\n') + fout_pubtator.write('\n') + fout_pubtator.close() + + +#SA main +def speciesAss(infolder,outpath, modelfile): + + if modelfile.lower().find('bioformer')>=0: + model_type='bioformer' + else: + model_type='pubmedbert' + + print('loading SA models........') + if model_type=='bioformer': + + vocabfiles={'labelfile':'./vocab/SpeAss_IO_label.vocab', + 'checkpoint_path':'./gnorm_trained_models/bioformer-cased-v1.0/', + 'lowercase':False, + } + else: + vocabfiles={'labelfile':'./vocab/SpeAss_IO_label.vocab', + 'checkpoint_path':'./gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/', + 'lowercase':True, + } + + nn_model=model_sa.HUGFACE_NER(vocabfiles) + nn_model.build_encoder() + nn_model.build_softmax_decoder() + nn_model.load_model(modelfile) + + dict_filename={'prefix':'./Dictionary/SPPrefix.txt', + 'virus':'./Dictionary/SP_Virus2HumanList.txt'} + fin=open(dict_filename['virus'],'r',encoding='utf-8') + virus_set=set(fin.read().strip().split('\n')) + fin.close() + + prefix_dict={}#{id:[prefix1,prefix2]} + fin=open(dict_filename['prefix'],'r',encoding='utf-8') + for line in fin: + seg= line.strip().split('\t') + if seg[0] not in prefix_dict.keys(): + prefix_dict[seg[0]]=seg[1].split('|') + else: + prefix_dict[seg[0]].extend(seg[1].split('|')) + fin.close() + + + + print("begin species assignment........") + start_time=time.time() + + for infile in os.listdir(infolder): + if os.path.isfile(outpath+"/"+infile): + print(infile+' has exsited.') + else: + print('Processing:',infile) + fin=open(infolder+"/"+infile, 'r',encoding='utf-8') + file_format="" + for line in fin: + pattern_bioc = re.compile('.*.*') + pattern_pubtator = re.compile('^([^\|]+)\|[^\|]+\|(.*)') + if pattern_bioc.search(line): + file_format="BioC" + break + elif pattern_pubtator.search(line): + file_format="PubTator" + break + fin.close() + if(file_format == "PubTator"): + SA_PubTator(infolder,infile,outpath,nn_model,virus_set,prefix_dict) + elif(file_format == "BioC"): + SA_BioC(infolder,infile,outpath,nn_model,virus_set,prefix_dict) + + + print('species assignment done:',time.time()-start_time) + +if __name__=='__main__': + + parser = argparse.ArgumentParser(description='run GeneNER and species assignment, python GeneNER_SpeAss_run.py -i input -n NERmodel -s SAmodel -r neroutput -a saoutput') + parser.add_argument('--infolder', '-i', help="input folder",default='./example/input/') + parser.add_argument('--NERmodel', '-n', help="trained deep learning NER model file",default='') + parser.add_argument('--SAmodel', '-s', help="trained deep learning species assignment model file",default='') + parser.add_argument('--NERoutpath', '-r', help="output folder to save the NER tagged results",default='./example/ner_output/') + parser.add_argument('--SAoutpath', '-a', help="output folder to save the SA tagged results",default='./example/sa_output/') + parser.add_argument('--NUM_THREADS', '-t', help="Number of threads",default='3') + args = parser.parse_args() + + + if args.NUM_THREADS.isdigit() == False: + args.NUM_THREADS='3' + + tf.config.threading.set_inter_op_parallelism_threads(int(args.NUM_THREADS)) + tf.config.threading.set_intra_op_parallelism_threads(int(args.NUM_THREADS)) + + if args.NERmodel!='' and args.SAmodel!='': + + #pipleline + print('==============\n| GeneNER and SpeAss |\n==============') + + #creat output folder + + if args.infolder[-1]!='/': + args.infolder+='/' + if not os.path.exists(args.infolder): + os.makedirs(args.infolder) + + if args.NERoutpath[-1]!='/': + args.NERoutpath+='/' + if not os.path.exists(args.NERoutpath): + os.makedirs(args.NERoutpath) + + if args.SAoutpath[-1]!='/': + args.SAoutpath+='/' + if not os.path.exists(args.SAoutpath): + os.makedirs(args.SAoutpath) + + #1. gene NER, the results are saved in outpath/ner_tmp/ + geneNER(args.infolder,args.NERoutpath, args.NERmodel) + + + #2. species assignment, the results are saved in outpath/sa_tmp/ + speciesAss(args.NERoutpath,args.SAoutpath, args.SAmodel) + + elif args.NERmodel!='' and args.SAmodel=='': + if args.infolder[-1]!='/': + args.infolder+='/' + if not os.path.exists(args.infolder): + os.makedirs(args.infolder) + + # only geneNER + if args.NERoutpath[-1]!='/': + args.NERoutpath+='/' + if not os.path.exists(args.NERoutpath): + os.makedirs(args.NERoutpath) + + print('==============\n| GeneNER |\n==============') + geneNER(args.infolder,args.NERoutpath,args.NERmodel) + + elif args.NERmodel=='' and args.SAmodel!='': + # only speass + if args.SAoutpath[-1]!='/': + args.SAoutpath+='/' + if not os.path.exists(args.SAoutpath): + os.makedirs(args.SAoutpath) + + print('==============\n| SpeAss |\n==============') + speciesAss(args.infolder,args.SAoutpath,args.SAmodel) + else: + print('Please provide models!') + + \ No newline at end of file diff --git a/Library/Ab3P.C b/Library/Ab3P.C index bd811b8907b48441af337dda13edd5091c3b0737..a6b93029de6d5050c016f206c2f7a55bc3cbddda 100644 --- a/Library/Ab3P.C +++ b/Library/Ab3P.C @@ -1,110 +1,110 @@ -#include "Ab3P.h" - -Ab3P::Ab3P ( void ) : - buffer(""), - wrdData( new WordData ) -{ - - string sf_grp, sf_nchr, strat; - double value; - - char file_name[1000]; - get_pathw( file_name, "Ab3P", "prec", "dat" ); - ifstream fin(file_name); - if(!fin) { - cout << "Cannot open Ab3P_prec.dat\n"; - exit(1); - } - //get precision of a given #-ch SF's strategy - while(fin>>sf_grp>>sf_nchr>>strat) { - fin>>value; //precision - stratPrec.insert(pair(sf_grp+sf_nchr+strat, value)); - util.push_back_strat(sf_grp+sf_nchr, strat); //set strategy sequence - } -} - -void Ab3P::get_abbrs( char * text, vector & abbrs ) { - abbrs.clear(); - - if( ! text[0] ) return; // skip empty line - - ab.Proc(text); //extract potential SF & LF pairs - - for(int i=0; i 0 ) { - abbrs.push_back( result ); - } - } - ab.cleara(); - - } - - -void Ab3P::try_pair( char * sf, char * lf, AbbrOut & result ) { - - //process i) lf (sf) - try_strats( sf, lf, false, result ); - - //process ii) sf (lf) - ab.token(lf); - try_strats( ab.lst[ab.num-1], sf, true, result ); -} - - - /** - psf -- pointer short form - plf -- pointer long form - **/ -void Ab3P::try_strats ( char * psf, char * plf, bool swap, - AbbrOut & result ) { - - string sfg; //SF group eg) Al1, Num2, Spec3 - //false if sf is not ok, sfg will be assigned - - if(!util.group_sf(psf,plf,sfg)) return; - if (swap) if(!util.exist_upperal(psf)) return; - - char sf[1000], sfl[1000]; - - //strategy sequence for a given #-ch SF group - vector strats = util.get_strats(sfg); - util.remove_nonAlnum(psf,sf); //sf will be w/o non-alnum - - //go through strategies - for( int j=0; jwData = wrdData; //set wordset, stopword - if(strat->strategy(sf,plf)) { //case sensitive - strat->str_tolower(sf,sfl); - - if( strat->lf_ok(psf,strat->lf) ) { - - map::iterator p = - stratPrec.find(sfg+strats[j]); - if(p==stratPrec.end()) { - cout << "No precision assigned" << endl; - exit(1); - } - - //add outputs - if( p->second>result.prec ) { - result.sf = psf; - result.lf = strat->lf; - result.prec = p->second; - result.strat = strats[j]; - } - - delete strat; - return; - } - } - delete strat; - } - -} +#include "Ab3P.h" + +Ab3P::Ab3P ( void ) : + buffer(""), + wrdData( new WordData ) +{ + + string sf_grp, sf_nchr, strat; + double value; + + char file_name[1000]; + get_pathw( file_name, "Ab3P", "prec", "dat" ); + ifstream fin(file_name); + if(!fin) { + cout << "Cannot open Ab3P_prec.dat\n"; + exit(1); + } + //get precision of a given #-ch SF's strategy + while(fin>>sf_grp>>sf_nchr>>strat) { + fin>>value; //precision + stratPrec.insert(pair(sf_grp+sf_nchr+strat, value)); + util.push_back_strat(sf_grp+sf_nchr, strat); //set strategy sequence + } +} + +void Ab3P::get_abbrs( char * text, vector & abbrs ) { + abbrs.clear(); + + if( ! text[0] ) return; // skip empty line + + ab.Proc(text); //extract potential SF & LF pairs + + for(int i=0; i 0 ) { + abbrs.push_back( result ); + } + } + ab.cleara(); + + } + + +void Ab3P::try_pair( char * sf, char * lf, AbbrOut & result ) { + + //process i) lf (sf) + try_strats( sf, lf, false, result ); + + //process ii) sf (lf) + ab.token(lf); + try_strats( ab.lst[ab.num-1], sf, true, result ); +} + + + /** + psf -- pointer short form + plf -- pointer long form + **/ +void Ab3P::try_strats ( char * psf, char * plf, bool swap, + AbbrOut & result ) { + + string sfg; //SF group eg) Al1, Num2, Spec3 + //false if sf is not ok, sfg will be assigned + + if(!util.group_sf(psf,plf,sfg)) return; + if (swap) if(!util.exist_upperal(psf)) return; + + char sf[1000], sfl[1000]; + + //strategy sequence for a given #-ch SF group + vector strats = util.get_strats(sfg); + util.remove_nonAlnum(psf,sf); //sf will be w/o non-alnum + + //go through strategies + for( int j=0; jwData = wrdData; //set wordset, stopword + if(strat->strategy(sf,plf)) { //case sensitive + strat->str_tolower(sf,sfl); + + if( strat->lf_ok(psf,strat->lf) ) { + + map::iterator p = + stratPrec.find(sfg+strats[j]); + if(p==stratPrec.end()) { + cout << "No precision assigned" << endl; + exit(1); + } + + //add outputs + if( p->second>result.prec ) { + result.sf = psf; + result.lf = strat->lf; + result.prec = p->second; + result.strat = strats[j]; + } + + delete strat; + return; + } + } + delete strat; + } + +} diff --git a/Library/Ab3P.h b/Library/Ab3P.h index 60e2c7cac33a4512dba9812db2164bccd4be99cd..4a9aa8ed867667ece1a38f80000d1dd7b32a27a3 100644 --- a/Library/Ab3P.h +++ b/Library/Ab3P.h @@ -1,83 +1,83 @@ -/* -Identify sf & lf pairs from free text using multi-stage algorithm -process one line at a time and print out: -line - sf|lf|P-precision|strategy -*/ - -#include "AbbrvE.h" -#include "AbbrStra.h" -#include -#include -#include - -using namespace std; -using namespace iret; - -namespace iret { - -class AbbrOut { -public: - string sf, lf, strat; - double prec; - - AbbrOut( void ) : sf(""), lf(""), strat(""), prec(0) - {} - - void print ( ostream & out ) { - out << " " << sf << "|" << lf << "|" << prec; - } - -}; - - -class Ab3P { -public: - Ab3P( void ); - ~Ab3P(void) { delete wrdData; } - - /** Collect text for later abbreviation finding. **/ - void add_text( const string & text ) { - buffer += text; - } - void add_text( char * text ) { - buffer += text; - } - - /** Sets abbrs to the abbreviations found in previous calls to add_text. - Afterwords, resets the text buffer. **/ - void get_abbrs( vector & abbrs ) { - get_abbrs( buffer, abbrs ); - buffer = ""; - } - - /** Sets abbrs to the abbreviations found in text - Does not interfere with the add_text buffer. **/ - void get_abbrs( const string & text, vector & abbrs ) { - abbrs.clear(); - - if(text.empty()) return; // skip empty line - // const_cast need so correct get_abbrs get called, - // otherwise, infinite loop - get_abbrs( const_cast(text.c_str()), abbrs ); - } - void get_abbrs( char * text, vector & abbrs ); - - /** Try a potential sf-lf form to find proper lf, strategy used, - and pseudo-precision of result **/ - void try_pair( char * sf, char * lf, AbbrOut & abbr ); - - /** - psf -- pointer short form - plf -- pointer long form - **/ - void try_strats ( char * psf, char * plf, bool swap, AbbrOut & result ); - - AbbrvE ab; //default # pairs = 10,000 - map stratPrec; - StratUtil util; - WordData *wrdData; //set data needed for AbbrStra - string buffer; // collect text for later use -}; - -} +/* +Identify sf & lf pairs from free text using multi-stage algorithm +process one line at a time and print out: +line + sf|lf|P-precision|strategy +*/ + +#include "AbbrvE.h" +#include "AbbrStra.h" +#include +#include +#include + +using namespace std; +using namespace iret; + +namespace iret { + +class AbbrOut { +public: + string sf, lf, strat; + double prec; + + AbbrOut( void ) : sf(""), lf(""), strat(""), prec(0) + {} + + void print ( ostream & out ) { + out << " " << sf << "|" << lf << "|" << prec; + } + +}; + + +class Ab3P { +public: + Ab3P( void ); + ~Ab3P(void) { delete wrdData; } + + /** Collect text for later abbreviation finding. **/ + void add_text( const string & text ) { + buffer += text; + } + void add_text( char * text ) { + buffer += text; + } + + /** Sets abbrs to the abbreviations found in previous calls to add_text. + Afterwords, resets the text buffer. **/ + void get_abbrs( vector & abbrs ) { + get_abbrs( buffer, abbrs ); + buffer = ""; + } + + /** Sets abbrs to the abbreviations found in text + Does not interfere with the add_text buffer. **/ + void get_abbrs( const string & text, vector & abbrs ) { + abbrs.clear(); + + if(text.empty()) return; // skip empty line + // const_cast need so correct get_abbrs get called, + // otherwise, infinite loop + get_abbrs( const_cast(text.c_str()), abbrs ); + } + void get_abbrs( char * text, vector & abbrs ); + + /** Try a potential sf-lf form to find proper lf, strategy used, + and pseudo-precision of result **/ + void try_pair( char * sf, char * lf, AbbrOut & abbr ); + + /** + psf -- pointer short form + plf -- pointer long form + **/ + void try_strats ( char * psf, char * plf, bool swap, AbbrOut & result ); + + AbbrvE ab; //default # pairs = 10,000 + map stratPrec; + StratUtil util; + WordData *wrdData; //set data needed for AbbrStra + string buffer; // collect text for later use +}; + +} diff --git a/Library/AbbrStra.C b/Library/AbbrStra.C index 3f9c5c7274e714a674332b50ca68010349a8d04d..301e87930629a0a03a2a3eae20401f3827842772 100644 --- a/Library/AbbrStra.C +++ b/Library/AbbrStra.C @@ -1,1426 +1,1426 @@ -#include "AbbrStra.h" -#include -#include -#include -#include - - -WordData::WordData(const char *wrdnam, const char *stpnam, - const char *lfsnam) : - wrdset(wrdnam), stp(stpnam), lfs(lfsnam) -{ - wrdset.set_path_name("Ab3P"); - wrdset.gopen_ctable_map(); - stp.set_path_name("Ab3P"); - stp.gopen_htable_map(); - lfs.set_path_name("Ab3P"); - lfs.gopen_htable_map(); -} - -WordData::~WordData() -{ - wrdset.gclose_ctable_map(); - stp.gclose_htable_map(); - lfs.gclose_htable_map(); -} - - -AbbrStra::AbbrStra() -{ - npairs = tpairs = nsfs = nmatchs = amatchs = 0; -} - - -AbbrStra::~AbbrStra() -{ -} - - -void AbbrStra::token(const char *str, char lst[1000][1000]) -{ - long i,j=0,k=0; - long n=strlen(str)-1; - - while(isblank(str[n])) n--; - - while(str[j]){ - while(isblank(str[j]))j++; - i=j; - while((str[j])&&(!isblank(str[j])))j++; - strncpy(lst[k],str+i,j-i); - lst[k][j-i]='\0'; - if(str[j]){ - k++; - j++; - } - } - if((j-1)>n) k--; //added by Sohn (Jan-17-08): "ab cd " -> 2 tokens - ntk=k+1; //# tokens, ntk is data member -} - - -long AbbrStra::tokenize(const char *str, char lst[1000][1000]) -{ - long i,j=0,k=0; - long n=strlen(str)-1; - - while(isblank(str[n])) n--; - - while(str[j]){ - while(isblank(str[j]))j++; - i=j; - while((str[j])&&(!isblank(str[j])))j++; - strncpy(lst[k],str+i,j-i); - lst[k][j-i]='\0'; - if(str[j]){ - k++; - j++; - } - } - if((j-1)>n) k--; //added by Sohn (Jan-17-08): "ab cd " -> 2 tokens - return k+1; //# tokens -} - - -long AbbrStra::num_token(const char *str) -{ - long i,j=0,k=0; - long n=strlen(str)-1; - - while(isblank(str[n])) n--; - - while(str[j]){ - while(isblank(str[j]))j++; - i=j; - while((str[j])&&(!isblank(str[j])))j++; - if(str[j]){ - k++; - j++; - } - } - if((j-1)>n) k--; //added by Sohn (Jan-17-08): "ab cd " -> 2 tokens - return k+1; //# tokens -} - - -// fch is 1st char of str token from backward -long AbbrStra::first_ch(const char *str, char *fch, long num) -{ - long i, j, numtk; - char tk[1000][1000]; - - numtk = tokenize(str,tk); - if(num>numtk) return 0; - - for(i=0; i=0; i--) - if(!isupper(str[i]) || !isalpha(str[i])) - return 0; - return 1; -} - -long AbbrStra::is_alpha(const char *str) -{ - for(long i=strlen(str)-1; i>=0; i--) - if(!isalpha(str[i])) - return 0; - return 1; -} - - -// str2 will lower-case of str1 -void AbbrStra::str_tolower(const char *str1, char *str2) -{ - long i=0; - - while(str1[i]) { - str2[i] = tolower(str1[i]); - i++; - } - str2[i] = '\0'; -} - -//copy num tokens from back of str1 to str2 -long AbbrStra::get_str(const char *str1, char *str2, long num) -{ - char ch, tk[1000][1000]; - long i, j, numtk; - - if(num<0) { cout<<"num<0\n"; exit(1); } - numtk = tokenize(str1,tk); - if(numtk=0; i--) { - if(isupper(tk[i][0])) j++; - else return j; - } - - return j; -} - -void AbbrStra::get_alpha(const char *str1, char *str2) -{ - long i = 0, j = 0; - long len = strlen(str1); - - while(i=0; i--) { - if(longf[i]=='(') paren++; - if(longf[i]==')') paren--; - if(longf[i]=='[') sbrac++; - if(longf[i]==']') sbrac--; - } - if(paren!=0 || sbrac!=0) return false; - - s.assign(shrtf); - l.assign(longf); - - for(i=0; i=0) { - loop1: while((tkloc>=0)&&(tok[tkinx][tkloc]!=abbr[sfloc])) tkloc--; - if(tkloc<0) { - tkinx--; - if(tkinx<0) return 0; //moved to here (Sep-14-07) - tkloc=strlen(tok[tkinx])-1; - } - else { - if(sfloc==0) { - if(tkloc!=0) { - if(!first) { tkloc--; goto loop1; } - else if(isalnum(tok[tkinx][tkloc-1])) { tkloc--; goto loop1; } - } - } - mod[sfloc][0]=tkinx; - mod[sfloc][1]=tkloc; - sfloc--; tkloc--; - } - } - - return 1; -} - -long AbbrStra::search_backward_adv(const char *abbr, bool flag) -{ - long i; - long lna=strlen(abbr); - - i=0; - while(i0) j+=k; - i++; - } - - if(j>0) return true; - else return false; -} - - -bool AbbrStra::exist_n_skipwords(long nsf, long n) -{ - long i=0, j, k; - bool flag=false; - - //k: # skip words - while(in) return false; - if(k==n) flag=true; - i++; - } - - if(flag) return true; - else return false; -} - -//exists n consecutive skip stopwords between tokens -bool AbbrStra::exist_n_stopwords(long nsf, long n) -{ - long i=0, j, k; - bool flag=false; - - while(in) return false; - if(k==n) flag=true; - if(k>0) { //skip word exists - while(k) { - if(!wData->stp.find(tok[mod[i][0]+k])) return false; - k--; - } - } - i++; - } - - if(flag) return true; - else return false; -} - - -bool AbbrStra::stopword_ok(long nsf, long nsw) -{ - long i=0, j, k; - - while(insw) return false; - if(k>0) { //skip word exists - while(k) { - if(!wData->stp.find(tok[mod[i][0]+k])) return false; - k--; - } - } - i++; - } - - return true; -} - -bool AbbrStra::skip_stop_ok(long nsf, long nsw, long n) -{ - long i=0, j, k, nstp; - - while(insw) return false; - //if(k>0) { //skip word exists - if(k>(nsw-n)) { - nstp=0; //# skiped stopword between tokens - while(k) { - if(wData->stp.find(tok[mod[i][0]+k])) nstp++; - k--; - } - if(nstp0)&&(k!=nsw)) return false; - if(k>0) { //skip word exists - nstp=0; //# skiped stopword between tokens - while(k) { - if(wData->stp.find(tok[mod[i][0]+k])) nstp++; - k--; - } - if(nstpnsw) return false; - i++; - } - - return true; -} - - -bool AbbrStra::is_subword(long nsf) -{ - long i=0; - char word[1000]; - - while(iwrdset.count(word)==0) return false; - } - i++; - } - - return true; -} - - -bool AbbrStra::is_BeginWrdMatch(long nsf, bool general) -{ - long i=0, j; - bool *bwm = new bool [ntk]; //BeginWrdMatch of a given tok - - for(j=0; j0) wwm++; - } - else { - if(mod[i][1]>0 && isalnum(tok[mod[i][0]][mod[i][1]-1])) wwm++; - } - i++; - } - - if(wwm>0) return true; - else return false; -} - - -bool AbbrStra::is_FirstLetMatch(long nsf, bool general) -{ - long i=0, flm=0, flm2=0; - - while(i=1) ) return true; - else return false; -} - - -bool AbbrStra::is_FirstLetSMatch(const char *abbr, bool general) -{ - long i=0, j=strlen(abbr)-1, flm=0, lsm=0; - - while(i=2) return true; - else return false; -} -//---- - - -//---1st ch must be alnum & at least one alphabet for all -//str1: sf -bool AbbrStra::set_condition(const char *str1) -{ - int n=0, m=0, o=0; - - switch(setCondition) { - case 1: //all alphabet SFs - for(long i=strlen(str1)-1; i>=0; i--) - if(!isalpha(str1[i])) - return false; - return true; - break; - case 2: //at least one non-alphabet - if(!isalnum(str1[0])) return false; - for(long i=strlen(str1)-1; i>=0; i--) { - if(isalpha(str1[i])) n++; - else m++; - } - if( (n>0) && (m>0) ) return true; - else return false; - break; - case 3: //only alnum & at least one num - for(long i=strlen(str1)-1; i>=0; i--) { - if(!isalnum(str1[i])) return false; - if(isalpha(str1[i])) n++; - if(isdigit(str1[i])) m++; - } - if( (n>0) && (m>0) ) return true; - else return false; - break; - case 4: //only alpha and non-alnum & at least one non-alnum - if(!isalpha(str1[0])) return false; - for(long i=strlen(str1)-1; i>=0; i--) { - if(isdigit(str1[i])) return false; - if(!isalnum(str1[i])) n++; - } - if(n>0) return true; - else return false; - break; - case 5: //at least one non-alnum - if(!isalnum(str1[0])) return false; - for(long i=strlen(str1)-1; i>0; i--) { - if(!isalnum(str1[i])) return true; - } - return false; - break; - case 6: //at least one num and non-alnum - if(!isalnum(str1[0])) return false; - for(long i=strlen(str1)-1; i>=0; i--) { - if(isalpha(str1[i])) n++; - if(isdigit(str1[i])) m++; - if(!isalnum(str1[i])) o++; - } - if( (n>0) && (m>0) && (o>0) ) return true; - else return false; - break; - case 7: //1+2 (SH algorithm) - if(!isalnum(str1[0])) return false; - for(long i=strlen(str1)-1; i>=0; i--) - if(isalpha(str1[i])) return true; - return false; - break; - default: - cout << "Not defined set condition\n"; - exit(1); - } -} - -//--- -//same as FirstLet::set_condition -//but requires extra set conditions -bool FirstLetOneChSF::set_condition(const char *shrtf, const char *longf, char *str) -{ - long i=0, len=strlen(shrtf), numtk; - char tk[1000][1000]; - - //sf conditions: all alphabet - while(i=|SF|, 1st ch of words must be alphabet - numtk = tokenize(longf,tk); - if(len>numtk) return false; - - for(i=0; istp.find(phrl)) return 0; //last token is stopword - if(!wData->lfs.find(phrl)) return 0; //lfs (1-ch sf) for FirstLet match cases < 2 - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!skipword_ok(lna,0)) continue; - if(!is_FirstLetMatch(lna,genFL)) continue; //not allow 1-alpha - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} -//--- - -bool FirstLet::set_condition(const char *shrtf, const char *longf, char *str) -{ - long i=0, len=strlen(shrtf), numtk; - char tk[1000][1000]; - - //sf conditions - while(inumtk) return false; - - for(i=0; i=0; i--) { - if(!isupper(str[i])) return false; - if(!isalpha(str[i])) return false; //necessary? - } - - return true; -} - - -long FirstLetGenS::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - if(!set_condition(sf_)) return 0; - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!skipword_ok(lna,0)) continue; - if(!is_FirstLetSMatch(sf,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long FirstLetGenStp::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!exist_skipword(lna)) continue; - if(!stopword_ok(lna,1)) continue; - if(!is_FirstLetMatch(lna,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long FirstLetGenStp2::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!exist_n_stopwords(lna,2)) continue; - if(!is_FirstLetMatch(lna,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long FirstLetGenSkp::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!exist_skipword(lna)) continue; - if(!skipword_ok(lna,1)) continue; - if(!is_FirstLetMatch(lna,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long WithinWrdWrd::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!skipword_ok(lna,0)) continue; - if(!is_subword(lna)) continue; - if(!is_WithinWrdMatch(lna,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long WithinWrdFWrd::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!skipword_ok(lna,0)) continue; - if(!is_subword(lna)) continue; - if(!is_BeginWrdMatch(lna,genFL)) continue; - if(!is_WithinWrdMatch(lna,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long WithinWrdFWrdSkp::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!exist_skipword(lna)) continue; - if(!skipword_ok(lna,1)) continue; - if(!is_subword(lna)) continue; - if(!is_BeginWrdMatch(lna,genFL)) continue; - if(!is_WithinWrdMatch(lna,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long WithinWrdLet::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!skipword_ok(lna,0)) continue; - if(!is_WithinWrdMatch(lna,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long WithinWrdFLet::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!skipword_ok(lna,0)) continue; - if(!is_BeginWrdMatch(lna,genFL)) continue; - if(!is_WithinWrdMatch(lna,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long WithinWrdFLetSkp::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!exist_skipword(lna)) continue; - if(!skipword_ok(lna,1)) continue; - if(!is_BeginWrdMatch(lna,genFL)) continue; - if(!is_WithinWrdMatch(lna,genFL)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long ContLet::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!skipword_ok(lna,0)) continue; - if(!is_BeginWrdMatch(lna,genFL)) continue; - if(!is_ContLetMatch(lna)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long ContLetSkp::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!exist_skipword(lna)) continue; - if(!skipword_ok(lna,1)) continue; - if(!is_BeginWrdMatch(lna,genFL)) continue; - if(!is_ContLetMatch(lna)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - -long AnyLet::strategy(const char *sf_, const char *str_) -{ - long lna,lnt,flag; - bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't - - str_tolower(sf_,sf); - str_tolower(str_,text); - - token(text,tok); - lna = strlen(sf); - lnt = strlen(tok[ntk-1]); - - flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); - if(!flag) return 0; - - do { - if(!skipword_ok(lna,1)) continue; - - extract_lf(mod[0][0],ntk-1,str_); - return 1; - } while(search_backward_adv(sf,genFL)); - - return 0; -} - - - -//----- -AbbrStra * StratUtil::strat_factory(string name) -{ - if(name=="FirstLetOneChSF") return new FirstLetOneChSF; - else if(name=="FirstLet") return new FirstLet; - else if(name=="FirstLetGen") return new FirstLetGen; - else if(name=="FirstLetGen2") return new FirstLetGen2; - else if(name=="FirstLetGenS") return new FirstLetGenS; - else if(name=="FirstLetGenStp") return new FirstLetGenStp; - else if(name=="FirstLetGenStp2") return new FirstLetGenStp2; - else if(name=="FirstLetGenSkp") return new FirstLetGenSkp; - else if(name=="WithinWrdWrd") return new WithinWrdWrd; - else if(name=="WithinWrdFWrd") return new WithinWrdFWrd; - else if(name=="WithinWrdFWrdSkp") return new WithinWrdFWrdSkp; - else if(name=="WithinWrdLet") return new WithinWrdLet; - else if(name=="WithinWrdFLet") return new WithinWrdFLet; - else if(name=="WithinWrdFLetSkp") return new WithinWrdFLetSkp; - else if(name=="ContLet") return new ContLet; - else if(name=="ContLetSkp") return new ContLetSkp; - else if(name=="AnyLet") return new AnyLet; - else { cout << "Fail strat_factory\n"; exit(1); } -} - - -//check if sf is ok and assign a group -//if sf length > 5, use 5!! -//grp will be Al+#ChInSF, Num+#ChInSF, or Spec+#ChInSF -bool StratUtil::group_sf(const char *sf, string &grp) -{ - long i, j, len=strlen(sf); - long al=0, num=0, nonalnum=0; - long paren=0, sbrac=0; - - grp = ""; // if failure, no group - - if(!isalnum(sf[0])) return false; //1sf ch must alnum - for(i=0; i=0; i--) { - if(sf[i]=='(') paren++; - if(sf[i]==')') paren--; - if(sf[i]=='[') sbrac++; - if(sf[i]==']') sbrac--; - } - if(paren!=0 || sbrac!=0) return false; - - if(al==len) grp.assign("Al"); - else if(num>0) grp.assign("Num"); - else if(nonalnum>0) grp.assign("Spec"); - else { cout << "No sf group\n"; exit(1); } - - //append sf length - len = len>5 ? 5 : len; - - switch(len) { - case 1: - grp.append("1"); - break; - case 2: - grp.append("2"); - break; - case 3: - grp.append("3"); - break; - case 4: - grp.append("4"); - break; - case 5: - grp.append("5"); - break; - default: - cout << "Not defined #-ch SF" << endl; - exit(1); - } - - return true; -} - -//add the condition |lf|>|sf| -bool StratUtil::group_sf(const char *sf, const char *lf, string &grp) -{ - long i, j, len=strlen(sf); - long al=0, num=0, nonalnum=0; - long paren=0, sbrac=0; - - if(strlen(lf)|sf| - if(!isalnum(sf[0])) return false; //1sf ch must alnum - for(i=0; i10) return false; //|alpha sf| is at most 10 - if(num_token(sf)>2) return false; //added Feb-21-08 - - //false for one parenthesis or square bracket - for(i=len-1; i>=0; i--) { - if(sf[i]=='(') paren++; - if(sf[i]==')') paren--; - if(sf[i]=='[') sbrac++; - if(sf[i]==']') sbrac--; - } - if(paren!=0 || sbrac!=0) return false; - - if(al==len) grp.assign("Al"); - else if(num>0) grp.assign("Num"); - else if(nonalnum>0) grp.assign("Spec"); - else { cout << "No sf group\n"; exit(1); } - - //append sf length - len = len>5 ? 5 : len; - - switch(len) { - case 1: - grp.append("1"); - break; - case 2: - grp.append("2"); - break; - case 3: - grp.append("3"); - break; - case 4: - grp.append("4"); - break; - case 5: - grp.append("5"); - break; - default: - cout << "Not defined #-ch SF" << endl; - exit(1); - } - - return true; -} - - -//remove non-alnum in str1 and save it to str2 -void StratUtil::remove_nonAlnum(const char *str1, char *str2) -{ - long i=0, j=0; - - while(str1[i]) { - if(isalnum(str1[i])) { - str2[j] = str1[i]; - j++; - } - i++; - } - str2[j] = '\0'; -} - - -vector StratUtil::get_strats(string s) -{ - if(s=="Al1") return Al1; - else if(s=="Al2") return Al2; - else if(s=="Al3") return Al3; - else if(s=="Al4") return Al4; - else if(s=="Al5") return Al5; - else if(s=="Num2") return Num2; - else if(s=="Num3") return Num3; - else if(s=="Num4") return Num4; - else if(s=="Num5") return Num5; - else if(s=="Spec2") return Spec2; - else if(s=="Spec3") return Spec3; - else if(s=="Spec4") return Spec4; - else if(s=="Spec5") return Spec5; - else { cout << "Incorrect name\n"; exit(1); } -} - - -void StratUtil::push_back_strat(string sgp, string strat) -{ - if(sgp=="Al1") Al1.push_back(strat); - else if(sgp=="Al2") Al2.push_back(strat); - else if(sgp=="Al3") Al3.push_back(strat); - else if(sgp=="Al4") Al4.push_back(strat); - else if(sgp=="Al5") Al5.push_back(strat); - else if(sgp=="Num2") Num2.push_back(strat); - else if(sgp=="Num3") Num3.push_back(strat); - else if(sgp=="Num4") Num4.push_back(strat); - else if(sgp=="Num5") Num5.push_back(strat); - else if(sgp=="Spec2") Spec2.push_back(strat); - else if(sgp=="Spec3") Spec3.push_back(strat); - else if(sgp=="Spec4") Spec4.push_back(strat); - else if(sgp=="Spec5") Spec5.push_back(strat); -} - - -long StratUtil::exist_upperal(const char *str) -{ - long i, len=strlen(str); - - for(i=0; in) k--; //added by Sohn (Jan-17-08): "ab cd " -> 2 tokens - return k+1; //# tokens -} -//----- +#include "AbbrStra.h" +#include +#include +#include +#include + + +WordData::WordData(const char *wrdnam, const char *stpnam, + const char *lfsnam) : + wrdset(wrdnam), stp(stpnam), lfs(lfsnam) +{ + wrdset.set_path_name("Ab3P"); + wrdset.gopen_ctable_map(); + stp.set_path_name("Ab3P"); + stp.gopen_htable_map(); + lfs.set_path_name("Ab3P"); + lfs.gopen_htable_map(); +} + +WordData::~WordData() +{ + wrdset.gclose_ctable_map(); + stp.gclose_htable_map(); + lfs.gclose_htable_map(); +} + + +AbbrStra::AbbrStra() +{ + npairs = tpairs = nsfs = nmatchs = amatchs = 0; +} + + +AbbrStra::~AbbrStra() +{ +} + + +void AbbrStra::token(const char *str, char lst[1000][1000]) +{ + long i,j=0,k=0; + long n=strlen(str)-1; + + while(isblank(str[n])) n--; + + while(str[j]){ + while(isblank(str[j]))j++; + i=j; + while((str[j])&&(!isblank(str[j])))j++; + strncpy(lst[k],str+i,j-i); + lst[k][j-i]='\0'; + if(str[j]){ + k++; + j++; + } + } + if((j-1)>n) k--; //added by Sohn (Jan-17-08): "ab cd " -> 2 tokens + ntk=k+1; //# tokens, ntk is data member +} + + +long AbbrStra::tokenize(const char *str, char lst[1000][1000]) +{ + long i,j=0,k=0; + long n=strlen(str)-1; + + while(isblank(str[n])) n--; + + while(str[j]){ + while(isblank(str[j]))j++; + i=j; + while((str[j])&&(!isblank(str[j])))j++; + strncpy(lst[k],str+i,j-i); + lst[k][j-i]='\0'; + if(str[j]){ + k++; + j++; + } + } + if((j-1)>n) k--; //added by Sohn (Jan-17-08): "ab cd " -> 2 tokens + return k+1; //# tokens +} + + +long AbbrStra::num_token(const char *str) +{ + long i,j=0,k=0; + long n=strlen(str)-1; + + while(isblank(str[n])) n--; + + while(str[j]){ + while(isblank(str[j]))j++; + i=j; + while((str[j])&&(!isblank(str[j])))j++; + if(str[j]){ + k++; + j++; + } + } + if((j-1)>n) k--; //added by Sohn (Jan-17-08): "ab cd " -> 2 tokens + return k+1; //# tokens +} + + +// fch is 1st char of str token from backward +long AbbrStra::first_ch(const char *str, char *fch, long num) +{ + long i, j, numtk; + char tk[1000][1000]; + + numtk = tokenize(str,tk); + if(num>numtk) return 0; + + for(i=0; i=0; i--) + if(!isupper(str[i]) || !isalpha(str[i])) + return 0; + return 1; +} + +long AbbrStra::is_alpha(const char *str) +{ + for(long i=strlen(str)-1; i>=0; i--) + if(!isalpha(str[i])) + return 0; + return 1; +} + + +// str2 will lower-case of str1 +void AbbrStra::str_tolower(const char *str1, char *str2) +{ + long i=0; + + while(str1[i]) { + str2[i] = tolower(str1[i]); + i++; + } + str2[i] = '\0'; +} + +//copy num tokens from back of str1 to str2 +long AbbrStra::get_str(const char *str1, char *str2, long num) +{ + char ch, tk[1000][1000]; + long i, j, numtk; + + if(num<0) { cout<<"num<0\n"; exit(1); } + numtk = tokenize(str1,tk); + if(numtk=0; i--) { + if(isupper(tk[i][0])) j++; + else return j; + } + + return j; +} + +void AbbrStra::get_alpha(const char *str1, char *str2) +{ + long i = 0, j = 0; + long len = strlen(str1); + + while(i=0; i--) { + if(longf[i]=='(') paren++; + if(longf[i]==')') paren--; + if(longf[i]=='[') sbrac++; + if(longf[i]==']') sbrac--; + } + if(paren!=0 || sbrac!=0) return false; + + s.assign(shrtf); + l.assign(longf); + + for(i=0; i=0) { + loop1: while((tkloc>=0)&&(tok[tkinx][tkloc]!=abbr[sfloc])) tkloc--; + if(tkloc<0) { + tkinx--; + if(tkinx<0) return 0; //moved to here (Sep-14-07) + tkloc=strlen(tok[tkinx])-1; + } + else { + if(sfloc==0) { + if(tkloc!=0) { + if(!first) { tkloc--; goto loop1; } + else if(isalnum(tok[tkinx][tkloc-1])) { tkloc--; goto loop1; } + } + } + mod[sfloc][0]=tkinx; + mod[sfloc][1]=tkloc; + sfloc--; tkloc--; + } + } + + return 1; +} + +long AbbrStra::search_backward_adv(const char *abbr, bool flag) +{ + long i; + long lna=strlen(abbr); + + i=0; + while(i0) j+=k; + i++; + } + + if(j>0) return true; + else return false; +} + + +bool AbbrStra::exist_n_skipwords(long nsf, long n) +{ + long i=0, j, k; + bool flag=false; + + //k: # skip words + while(in) return false; + if(k==n) flag=true; + i++; + } + + if(flag) return true; + else return false; +} + +//exists n consecutive skip stopwords between tokens +bool AbbrStra::exist_n_stopwords(long nsf, long n) +{ + long i=0, j, k; + bool flag=false; + + while(in) return false; + if(k==n) flag=true; + if(k>0) { //skip word exists + while(k) { + if(!wData->stp.find(tok[mod[i][0]+k])) return false; + k--; + } + } + i++; + } + + if(flag) return true; + else return false; +} + + +bool AbbrStra::stopword_ok(long nsf, long nsw) +{ + long i=0, j, k; + + while(insw) return false; + if(k>0) { //skip word exists + while(k) { + if(!wData->stp.find(tok[mod[i][0]+k])) return false; + k--; + } + } + i++; + } + + return true; +} + +bool AbbrStra::skip_stop_ok(long nsf, long nsw, long n) +{ + long i=0, j, k, nstp; + + while(insw) return false; + //if(k>0) { //skip word exists + if(k>(nsw-n)) { + nstp=0; //# skiped stopword between tokens + while(k) { + if(wData->stp.find(tok[mod[i][0]+k])) nstp++; + k--; + } + if(nstp0)&&(k!=nsw)) return false; + if(k>0) { //skip word exists + nstp=0; //# skiped stopword between tokens + while(k) { + if(wData->stp.find(tok[mod[i][0]+k])) nstp++; + k--; + } + if(nstpnsw) return false; + i++; + } + + return true; +} + + +bool AbbrStra::is_subword(long nsf) +{ + long i=0; + char word[1000]; + + while(iwrdset.count(word)==0) return false; + } + i++; + } + + return true; +} + + +bool AbbrStra::is_BeginWrdMatch(long nsf, bool general) +{ + long i=0, j; + bool *bwm = new bool [ntk]; //BeginWrdMatch of a given tok + + for(j=0; j0) wwm++; + } + else { + if(mod[i][1]>0 && isalnum(tok[mod[i][0]][mod[i][1]-1])) wwm++; + } + i++; + } + + if(wwm>0) return true; + else return false; +} + + +bool AbbrStra::is_FirstLetMatch(long nsf, bool general) +{ + long i=0, flm=0, flm2=0; + + while(i=1) ) return true; + else return false; +} + + +bool AbbrStra::is_FirstLetSMatch(const char *abbr, bool general) +{ + long i=0, j=strlen(abbr)-1, flm=0, lsm=0; + + while(i=2) return true; + else return false; +} +//---- + + +//---1st ch must be alnum & at least one alphabet for all +//str1: sf +bool AbbrStra::set_condition(const char *str1) +{ + int n=0, m=0, o=0; + + switch(setCondition) { + case 1: //all alphabet SFs + for(long i=strlen(str1)-1; i>=0; i--) + if(!isalpha(str1[i])) + return false; + return true; + break; + case 2: //at least one non-alphabet + if(!isalnum(str1[0])) return false; + for(long i=strlen(str1)-1; i>=0; i--) { + if(isalpha(str1[i])) n++; + else m++; + } + if( (n>0) && (m>0) ) return true; + else return false; + break; + case 3: //only alnum & at least one num + for(long i=strlen(str1)-1; i>=0; i--) { + if(!isalnum(str1[i])) return false; + if(isalpha(str1[i])) n++; + if(isdigit(str1[i])) m++; + } + if( (n>0) && (m>0) ) return true; + else return false; + break; + case 4: //only alpha and non-alnum & at least one non-alnum + if(!isalpha(str1[0])) return false; + for(long i=strlen(str1)-1; i>=0; i--) { + if(isdigit(str1[i])) return false; + if(!isalnum(str1[i])) n++; + } + if(n>0) return true; + else return false; + break; + case 5: //at least one non-alnum + if(!isalnum(str1[0])) return false; + for(long i=strlen(str1)-1; i>0; i--) { + if(!isalnum(str1[i])) return true; + } + return false; + break; + case 6: //at least one num and non-alnum + if(!isalnum(str1[0])) return false; + for(long i=strlen(str1)-1; i>=0; i--) { + if(isalpha(str1[i])) n++; + if(isdigit(str1[i])) m++; + if(!isalnum(str1[i])) o++; + } + if( (n>0) && (m>0) && (o>0) ) return true; + else return false; + break; + case 7: //1+2 (SH algorithm) + if(!isalnum(str1[0])) return false; + for(long i=strlen(str1)-1; i>=0; i--) + if(isalpha(str1[i])) return true; + return false; + break; + default: + cout << "Not defined set condition\n"; + exit(1); + } +} + +//--- +//same as FirstLet::set_condition +//but requires extra set conditions +bool FirstLetOneChSF::set_condition(const char *shrtf, const char *longf, char *str) +{ + long i=0, len=strlen(shrtf), numtk; + char tk[1000][1000]; + + //sf conditions: all alphabet + while(i=|SF|, 1st ch of words must be alphabet + numtk = tokenize(longf,tk); + if(len>numtk) return false; + + for(i=0; istp.find(phrl)) return 0; //last token is stopword + if(!wData->lfs.find(phrl)) return 0; //lfs (1-ch sf) for FirstLet match cases < 2 + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!skipword_ok(lna,0)) continue; + if(!is_FirstLetMatch(lna,genFL)) continue; //not allow 1-alpha + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} +//--- + +bool FirstLet::set_condition(const char *shrtf, const char *longf, char *str) +{ + long i=0, len=strlen(shrtf), numtk; + char tk[1000][1000]; + + //sf conditions + while(inumtk) return false; + + for(i=0; i=0; i--) { + if(!isupper(str[i])) return false; + if(!isalpha(str[i])) return false; //necessary? + } + + return true; +} + + +long FirstLetGenS::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + if(!set_condition(sf_)) return 0; + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!skipword_ok(lna,0)) continue; + if(!is_FirstLetSMatch(sf,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long FirstLetGenStp::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!exist_skipword(lna)) continue; + if(!stopword_ok(lna,1)) continue; + if(!is_FirstLetMatch(lna,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long FirstLetGenStp2::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!exist_n_stopwords(lna,2)) continue; + if(!is_FirstLetMatch(lna,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long FirstLetGenSkp::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!exist_skipword(lna)) continue; + if(!skipword_ok(lna,1)) continue; + if(!is_FirstLetMatch(lna,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long WithinWrdWrd::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!skipword_ok(lna,0)) continue; + if(!is_subword(lna)) continue; + if(!is_WithinWrdMatch(lna,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long WithinWrdFWrd::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!skipword_ok(lna,0)) continue; + if(!is_subword(lna)) continue; + if(!is_BeginWrdMatch(lna,genFL)) continue; + if(!is_WithinWrdMatch(lna,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long WithinWrdFWrdSkp::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!exist_skipword(lna)) continue; + if(!skipword_ok(lna,1)) continue; + if(!is_subword(lna)) continue; + if(!is_BeginWrdMatch(lna,genFL)) continue; + if(!is_WithinWrdMatch(lna,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long WithinWrdLet::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!skipword_ok(lna,0)) continue; + if(!is_WithinWrdMatch(lna,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long WithinWrdFLet::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!skipword_ok(lna,0)) continue; + if(!is_BeginWrdMatch(lna,genFL)) continue; + if(!is_WithinWrdMatch(lna,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long WithinWrdFLetSkp::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!exist_skipword(lna)) continue; + if(!skipword_ok(lna,1)) continue; + if(!is_BeginWrdMatch(lna,genFL)) continue; + if(!is_WithinWrdMatch(lna,genFL)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long ContLet::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!skipword_ok(lna,0)) continue; + if(!is_BeginWrdMatch(lna,genFL)) continue; + if(!is_ContLetMatch(lna)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long ContLetSkp::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!exist_skipword(lna)) continue; + if(!skipword_ok(lna,1)) continue; + if(!is_BeginWrdMatch(lna,genFL)) continue; + if(!is_ContLetMatch(lna)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + +long AnyLet::strategy(const char *sf_, const char *str_) +{ + long lna,lnt,flag; + bool genFL=true; //1:allow 1-ahpha for 1ch of SF match, 0:don't + + str_tolower(sf_,sf); + str_tolower(str_,text); + + token(text,tok); + lna = strlen(sf); + lnt = strlen(tok[ntk-1]); + + flag = search_backward(lna-1,ntk-1,lnt-1,sf,genFL); + if(!flag) return 0; + + do { + if(!skipword_ok(lna,1)) continue; + + extract_lf(mod[0][0],ntk-1,str_); + return 1; + } while(search_backward_adv(sf,genFL)); + + return 0; +} + + + +//----- +AbbrStra * StratUtil::strat_factory(string name) +{ + if(name=="FirstLetOneChSF") return new FirstLetOneChSF; + else if(name=="FirstLet") return new FirstLet; + else if(name=="FirstLetGen") return new FirstLetGen; + else if(name=="FirstLetGen2") return new FirstLetGen2; + else if(name=="FirstLetGenS") return new FirstLetGenS; + else if(name=="FirstLetGenStp") return new FirstLetGenStp; + else if(name=="FirstLetGenStp2") return new FirstLetGenStp2; + else if(name=="FirstLetGenSkp") return new FirstLetGenSkp; + else if(name=="WithinWrdWrd") return new WithinWrdWrd; + else if(name=="WithinWrdFWrd") return new WithinWrdFWrd; + else if(name=="WithinWrdFWrdSkp") return new WithinWrdFWrdSkp; + else if(name=="WithinWrdLet") return new WithinWrdLet; + else if(name=="WithinWrdFLet") return new WithinWrdFLet; + else if(name=="WithinWrdFLetSkp") return new WithinWrdFLetSkp; + else if(name=="ContLet") return new ContLet; + else if(name=="ContLetSkp") return new ContLetSkp; + else if(name=="AnyLet") return new AnyLet; + else { cout << "Fail strat_factory\n"; exit(1); } +} + + +//check if sf is ok and assign a group +//if sf length > 5, use 5!! +//grp will be Al+#ChInSF, Num+#ChInSF, or Spec+#ChInSF +bool StratUtil::group_sf(const char *sf, string &grp) +{ + long i, j, len=strlen(sf); + long al=0, num=0, nonalnum=0; + long paren=0, sbrac=0; + + grp = ""; // if failure, no group + + if(!isalnum(sf[0])) return false; //1sf ch must alnum + for(i=0; i=0; i--) { + if(sf[i]=='(') paren++; + if(sf[i]==')') paren--; + if(sf[i]=='[') sbrac++; + if(sf[i]==']') sbrac--; + } + if(paren!=0 || sbrac!=0) return false; + + if(al==len) grp.assign("Al"); + else if(num>0) grp.assign("Num"); + else if(nonalnum>0) grp.assign("Spec"); + else { cout << "No sf group\n"; exit(1); } + + //append sf length + len = len>5 ? 5 : len; + + switch(len) { + case 1: + grp.append("1"); + break; + case 2: + grp.append("2"); + break; + case 3: + grp.append("3"); + break; + case 4: + grp.append("4"); + break; + case 5: + grp.append("5"); + break; + default: + cout << "Not defined #-ch SF" << endl; + exit(1); + } + + return true; +} + +//add the condition |lf|>|sf| +bool StratUtil::group_sf(const char *sf, const char *lf, string &grp) +{ + long i, j, len=strlen(sf); + long al=0, num=0, nonalnum=0; + long paren=0, sbrac=0; + + if(strlen(lf)|sf| + if(!isalnum(sf[0])) return false; //1sf ch must alnum + for(i=0; i10) return false; //|alpha sf| is at most 10 + if(num_token(sf)>2) return false; //added Feb-21-08 + + //false for one parenthesis or square bracket + for(i=len-1; i>=0; i--) { + if(sf[i]=='(') paren++; + if(sf[i]==')') paren--; + if(sf[i]=='[') sbrac++; + if(sf[i]==']') sbrac--; + } + if(paren!=0 || sbrac!=0) return false; + + if(al==len) grp.assign("Al"); + else if(num>0) grp.assign("Num"); + else if(nonalnum>0) grp.assign("Spec"); + else { cout << "No sf group\n"; exit(1); } + + //append sf length + len = len>5 ? 5 : len; + + switch(len) { + case 1: + grp.append("1"); + break; + case 2: + grp.append("2"); + break; + case 3: + grp.append("3"); + break; + case 4: + grp.append("4"); + break; + case 5: + grp.append("5"); + break; + default: + cout << "Not defined #-ch SF" << endl; + exit(1); + } + + return true; +} + + +//remove non-alnum in str1 and save it to str2 +void StratUtil::remove_nonAlnum(const char *str1, char *str2) +{ + long i=0, j=0; + + while(str1[i]) { + if(isalnum(str1[i])) { + str2[j] = str1[i]; + j++; + } + i++; + } + str2[j] = '\0'; +} + + +vector StratUtil::get_strats(string s) +{ + if(s=="Al1") return Al1; + else if(s=="Al2") return Al2; + else if(s=="Al3") return Al3; + else if(s=="Al4") return Al4; + else if(s=="Al5") return Al5; + else if(s=="Num2") return Num2; + else if(s=="Num3") return Num3; + else if(s=="Num4") return Num4; + else if(s=="Num5") return Num5; + else if(s=="Spec2") return Spec2; + else if(s=="Spec3") return Spec3; + else if(s=="Spec4") return Spec4; + else if(s=="Spec5") return Spec5; + else { cout << "Incorrect name\n"; exit(1); } +} + + +void StratUtil::push_back_strat(string sgp, string strat) +{ + if(sgp=="Al1") Al1.push_back(strat); + else if(sgp=="Al2") Al2.push_back(strat); + else if(sgp=="Al3") Al3.push_back(strat); + else if(sgp=="Al4") Al4.push_back(strat); + else if(sgp=="Al5") Al5.push_back(strat); + else if(sgp=="Num2") Num2.push_back(strat); + else if(sgp=="Num3") Num3.push_back(strat); + else if(sgp=="Num4") Num4.push_back(strat); + else if(sgp=="Num5") Num5.push_back(strat); + else if(sgp=="Spec2") Spec2.push_back(strat); + else if(sgp=="Spec3") Spec3.push_back(strat); + else if(sgp=="Spec4") Spec4.push_back(strat); + else if(sgp=="Spec5") Spec5.push_back(strat); +} + + +long StratUtil::exist_upperal(const char *str) +{ + long i, len=strlen(str); + + for(i=0; in) k--; //added by Sohn (Jan-17-08): "ab cd " -> 2 tokens + return k+1; //# tokens +} +//----- diff --git a/Library/AbbrStra.h b/Library/AbbrStra.h index 0dcb2b34370f1799ad509ee77fb41b595dc68b7b..bac1313e9e49858eea5945da34a2cf78958839f5 100644 --- a/Library/AbbrStra.h +++ b/Library/AbbrStra.h @@ -1,332 +1,332 @@ -#ifndef ABBRSTRA_H -#define ABBRSTRA_H - -#include -#include -#include - -using namespace std; -using namespace iret; - - -class WordData { -public: - WordData(const char *wrdname="wrdset3", const char *stpname="stop", - const char *lfsname="Lf1chSf"); - - ~WordData(); - - Chash wrdset; //sigle word in MEDLINE - Hash stp; //stopword - Hash lfs; //lfs (1-ch sf) for FirstLet match cases >=2 -}; - - -class AbbrStra { -public: - AbbrStra(); - ~AbbrStra(); - void token(const char *str, char lst[1000][1000]); // tokennize & set ntk - long tokenize(const char *str, char lst[1000][1000]); //tokennize & return # tokens - long num_token(const char *str); //return # tokens - long first_ch(const char *str, char *fch, long num); - long is_upperal(const char *str); - long is_alpha(const char *str); - void str_tolower(const char *str1, char *str2); - long get_str(const char *str1, char *str2, long num); - bool isupper_str(const char *str); - bool is_onealpha(const char *str); - long count_upperstr(const char *str); - //return # upper-case 1st letter of consecutive tokens (backward) - void get_alpha(const char *str1, char *str2); - //set str2 with only alphabet of str1 - bool lf_ok(const char *shrtf, const char *longf); - - virtual bool set_condition(const char *sf); - //must set nonAlphaSF=true if want to use SF containing non-alphabet - virtual long strategy(const char *sf, const char *str) = 0; - //sf & str will be lower-cased (OCt-25-2007) - long search_backward(long sloc, long tnum, long tloc, const char *sf, bool first); - //search backward to find match starting from sf[sloc] - //Returns 1 if matches. sf[0] must match with begin word - long search_backward_adv(const char *sf, bool first); - //Searches for next model setting. Returns 1 if finds one. - void extract_lf(long begin, long end); - //save strings from begin to end of tok to lf - void extract_lf(long begin, long end, const char *str); - //save strings from begin to end of str's tok to lf - - //---after set mod check conditions - //nsf:# ch in sf, nsw:# allowed skipword, general:true allow 1st ch match after non-alnum - bool exist_skipword(long nsf); - //true if at least one skip word exists - bool exist_n_skipwords(long nsf, long n); - //true if exist n consecutive skip words between tokens but cannot be more than n - bool exist_n_stopwords(long nsf, long n); - //true if exist n consecutive skip stopwords between tokens but cannot be more than n - bool stopword_ok(long nsf, long nsw); - //true if at most (can be 0) nsw skip stopword in row exists - bool skip_stop_ok(long nsf, long nsw, long n); - //true if at most (can be 0) nsw skip word, which include at least n stopwords, in row exists - bool skip_stop_ok2(long nsf, long nsw, long n); - //true if nsw skip word, which include at least n stopwords, in row exists - bool skipword_ok(long nsf, long nsw); - //true if at most (can be 0) nsw skip word in row exists - bool is_subword(long nsf); - //true if matching string is begin of a tok or a word in wrdlist - bool is_BeginWrdMatch(long nsf, bool general); - //true if begining ch of a word match - //if general is true, allow match after non-alnum (eg, 1-alpha) - bool is_WithinWrdMatch(long nsf, bool general); - //true if within word match - //if general is true, 1-Alpha: 'A' is not within word match - bool is_FirstLetMatch(long nsf, bool general); - //true if each ch of sf match with 1st ch of word - //(true: Alpha anyword Beta (AB)) - //if general=true, true: 1-Alpha Beta, Alpha-Beta - bool is_FirstLetMatch2(long nsf, bool general); - //at least one 1-Alpha - bool is_FirstLetSMatch(const char *sf, bool general); - //true if first letter match & 's' match with last ch of lf - bool is_ContLetMatch(long nsf); - //true if two or more consecutive letter match - //--- - - char *pch; //sf applied to a strategy - char *ps, *pl; //sf, potential lf - char sf[100], text[10000]; //sf & potential lf used in a strategy - char lf[10000]; //lf found by a strategy - char tok[1000][1000]; //token of potential lf - //lower after strategy, original after extract_lf(b,e,str) - long ntk; //# tokens - long mod[100][2]; //match locations of tok with a given sf - //mod[sf_inx][0]=tok inx, mod[sf_inx][1]=match loc in tok[mod[sf_inx][0]] - - //for each n_ch-SF - long npairs; //selected pairs for this strategy - long tpairs; //total pairs - long nsfs; //# selected unique sfs for this strategy - long nmatchs; //# matchs (success strategy & given sf == real sf) - long amatchs; //# accumulated matchs up to this strategy - long setCondition; //SF condition - long greaterEqNsf; //if 1 select SF |SF|>=nsf - - WordData *wData; -}; - - -/* -alpha beta gamma (ABG) - */ -class FirstLet : public AbbrStra { -public: - virtual bool set_condition(const char *str1, const char *str2, char *str); - virtual long strategy(const char *sf, const char *str); -}; - - -class FirstLetOneChSF : public AbbrStra { -public: - virtual bool set_condition(const char *str1, const char *str2, char *str); - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- sf ch matchs with 1st ch or ch right after non-alphanum of lf - but at least one match right after non-alphanum - (eg, success: 1-alpha 2-beta (AB), alpha-beta(AB), - fail: alpha beta(AB)) -*/ -class FirstLetGen : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- sf ch matchs with 1st ch or ch right after non-alphanum of lf - (eg, success: 1-alpha 2-beta (AB), alpha-beta(AB), - alpha beta(AB)) -*/ -class FirstLetGen2 : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -For sf consisting of capital letters & lower-case 's' -- First letter & 's' in the last token of lf -(success: Alpha Betas (ABs), 1-Alpha Betas (ABs), - 1-Alpha-Betas (ABs), Alpha BetaS (ABs) - fail: Alpha Beta xxs (ABs) ) -*/ -class FirstLetGenS : public AbbrStra { -public: - virtual bool set_condition(const char *sf); //sf must be an original sf - //true if sf is like ABCs - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- sf ch matches with 1st ch or ch right after non-alphanum of lf -- allowing one skip stopword between tokens (no more than one in row) - at least one skip stopword in total - (eg, success: alpha and beta (AB), 1-alpha and beta (AB) - fail: alpha beta (AB), alpha word beta (AB)) -*/ -class FirstLetGenStp : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- same as FirstLetGenStp except for 2 skip stopwords - & at least one two consecutive skip stopwords - */ -class FirstLetGenStp2 : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- same as FirstLetGenStp except using skip any word instead of stopword - */ -class FirstLetGenSkp : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- a matching sub-string must be word - (eg, success: AlphaBeta (AB), Beta is word - x-AlphaBeta (AB) ) -- at least one within word match - (eg,fail: Alpha Beta Word (ABW), Alpha x-Beta x-Word (ABW) - success: AlphaBeta Word (ABW), x-AlphaBeta inWord (ABW)) - */ -class WithinWrdWrd : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- WithinWrdWrd w/ Begin Word Match - (success: AlphaBeta x-Word (ABW) - fail: AlphaBeta inWord (ABW) ) - */ -class WithinWrdFWrd : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- WithinWrdFWrd w/ allowing one skip word between tokens (no more than one in row) - at least one skip word in total - (success: AlphaBeta zzz x-Word zzz (ABW) - fail: AlphaBeta x-Word (ABW), AlphaBeta zzz yyy x-Word (ABW)) -*/ -class WithinWrdFWrdSkp : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- at least one within word match - ( success: Alpha InXyy (AX), x-Alpha InXyy (AX)) - fail: Alpha Xyy (AX), Alpha 1-Xyy (AX)) -*/ -class WithinWrdLet : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- WithinWrdLet w/ Begin Word Match - (fail: Alpha InXyy (AX), x-Alpha InXyy (AX) - success: AlphaXyy Word (AXW), x-AlphaXyy 1-Word (AXW)) -*/ -class WithinWrdFLet : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- WithinWrdFLet w/ allowing one skip word between tokens (no more than one in row) - at least one skip word in total - (success: AlphaXyy zzz Word zzz (AXW) - fail: AlphaXyy Word (AXW), AlphaXyy zzz yyy Word (AXW)) -*/ -class WithinWrdFLetSkp : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- any two consecutive letter matching w/ begin word match -eg) ABxxx (AB), 1-ABxxx (AB), ABxxx Cxxx (ABC), Axxx BCxxx (ABC) - prolactin (PRL), succinylcholine (SCh) -*/ -class ContLet : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- ContLet w/ allowing one skip word between tokens (no more than one in row) - at least one skip word in total -*/ -class ContLetSkp : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -/* -- match can occur anywhere -- allow one skip word between tokens (no more than one in row) - (success: Alpha yXyy (AX), Alpha yXyy word (AX) - 1-Alpha yXyy word (AX)) -*/ -class AnyLet : public AbbrStra { -public: - virtual long strategy(const char *sf, const char *str); -}; - - -class StratUtil { -public: - AbbrStra *strat_factory(string name); - vector get_strats(string s); - //get the strategy sequence for a given #-ch SF group - void push_back_strat(string sgp, string strat); - bool group_sf(const char *sf, string &grp); - //check if sf is ok and assign a group - bool group_sf(const char *sf, const char *lf, string &grp); - //add the contion |lf|>|sf| - void remove_nonAlnum(const char *str1, char *str2); - //remove non-alnum in str1 and save it to str2 - long exist_upperal(const char *str); //return 1 if exists upper char, 0 ow - long num_token(const char *str); //return # tokens - - vector Al1, Al2, Al3, Al4, Al5; - vector Num2, Num3, Num4, Num5; - vector Spec2, Spec3, Spec4, Spec5; -}; - - -#endif - - +#ifndef ABBRSTRA_H +#define ABBRSTRA_H + +#include +#include +#include + +using namespace std; +using namespace iret; + + +class WordData { +public: + WordData(const char *wrdname="wrdset3", const char *stpname="stop", + const char *lfsname="Lf1chSf"); + + ~WordData(); + + Chash wrdset; //sigle word in MEDLINE + Hash stp; //stopword + Hash lfs; //lfs (1-ch sf) for FirstLet match cases >=2 +}; + + +class AbbrStra { +public: + AbbrStra(); + ~AbbrStra(); + void token(const char *str, char lst[1000][1000]); // tokennize & set ntk + long tokenize(const char *str, char lst[1000][1000]); //tokennize & return # tokens + long num_token(const char *str); //return # tokens + long first_ch(const char *str, char *fch, long num); + long is_upperal(const char *str); + long is_alpha(const char *str); + void str_tolower(const char *str1, char *str2); + long get_str(const char *str1, char *str2, long num); + bool isupper_str(const char *str); + bool is_onealpha(const char *str); + long count_upperstr(const char *str); + //return # upper-case 1st letter of consecutive tokens (backward) + void get_alpha(const char *str1, char *str2); + //set str2 with only alphabet of str1 + bool lf_ok(const char *shrtf, const char *longf); + + virtual bool set_condition(const char *sf); + //must set nonAlphaSF=true if want to use SF containing non-alphabet + virtual long strategy(const char *sf, const char *str) = 0; + //sf & str will be lower-cased (OCt-25-2007) + long search_backward(long sloc, long tnum, long tloc, const char *sf, bool first); + //search backward to find match starting from sf[sloc] + //Returns 1 if matches. sf[0] must match with begin word + long search_backward_adv(const char *sf, bool first); + //Searches for next model setting. Returns 1 if finds one. + void extract_lf(long begin, long end); + //save strings from begin to end of tok to lf + void extract_lf(long begin, long end, const char *str); + //save strings from begin to end of str's tok to lf + + //---after set mod check conditions + //nsf:# ch in sf, nsw:# allowed skipword, general:true allow 1st ch match after non-alnum + bool exist_skipword(long nsf); + //true if at least one skip word exists + bool exist_n_skipwords(long nsf, long n); + //true if exist n consecutive skip words between tokens but cannot be more than n + bool exist_n_stopwords(long nsf, long n); + //true if exist n consecutive skip stopwords between tokens but cannot be more than n + bool stopword_ok(long nsf, long nsw); + //true if at most (can be 0) nsw skip stopword in row exists + bool skip_stop_ok(long nsf, long nsw, long n); + //true if at most (can be 0) nsw skip word, which include at least n stopwords, in row exists + bool skip_stop_ok2(long nsf, long nsw, long n); + //true if nsw skip word, which include at least n stopwords, in row exists + bool skipword_ok(long nsf, long nsw); + //true if at most (can be 0) nsw skip word in row exists + bool is_subword(long nsf); + //true if matching string is begin of a tok or a word in wrdlist + bool is_BeginWrdMatch(long nsf, bool general); + //true if begining ch of a word match + //if general is true, allow match after non-alnum (eg, 1-alpha) + bool is_WithinWrdMatch(long nsf, bool general); + //true if within word match + //if general is true, 1-Alpha: 'A' is not within word match + bool is_FirstLetMatch(long nsf, bool general); + //true if each ch of sf match with 1st ch of word + //(true: Alpha anyword Beta (AB)) + //if general=true, true: 1-Alpha Beta, Alpha-Beta + bool is_FirstLetMatch2(long nsf, bool general); + //at least one 1-Alpha + bool is_FirstLetSMatch(const char *sf, bool general); + //true if first letter match & 's' match with last ch of lf + bool is_ContLetMatch(long nsf); + //true if two or more consecutive letter match + //--- + + char *pch; //sf applied to a strategy + char *ps, *pl; //sf, potential lf + char sf[100], text[10000]; //sf & potential lf used in a strategy + char lf[10000]; //lf found by a strategy + char tok[1000][1000]; //token of potential lf + //lower after strategy, original after extract_lf(b,e,str) + long ntk; //# tokens + long mod[100][2]; //match locations of tok with a given sf + //mod[sf_inx][0]=tok inx, mod[sf_inx][1]=match loc in tok[mod[sf_inx][0]] + + //for each n_ch-SF + long npairs; //selected pairs for this strategy + long tpairs; //total pairs + long nsfs; //# selected unique sfs for this strategy + long nmatchs; //# matchs (success strategy & given sf == real sf) + long amatchs; //# accumulated matchs up to this strategy + long setCondition; //SF condition + long greaterEqNsf; //if 1 select SF |SF|>=nsf + + WordData *wData; +}; + + +/* +alpha beta gamma (ABG) + */ +class FirstLet : public AbbrStra { +public: + virtual bool set_condition(const char *str1, const char *str2, char *str); + virtual long strategy(const char *sf, const char *str); +}; + + +class FirstLetOneChSF : public AbbrStra { +public: + virtual bool set_condition(const char *str1, const char *str2, char *str); + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- sf ch matchs with 1st ch or ch right after non-alphanum of lf + but at least one match right after non-alphanum + (eg, success: 1-alpha 2-beta (AB), alpha-beta(AB), + fail: alpha beta(AB)) +*/ +class FirstLetGen : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- sf ch matchs with 1st ch or ch right after non-alphanum of lf + (eg, success: 1-alpha 2-beta (AB), alpha-beta(AB), + alpha beta(AB)) +*/ +class FirstLetGen2 : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +For sf consisting of capital letters & lower-case 's' +- First letter & 's' in the last token of lf +(success: Alpha Betas (ABs), 1-Alpha Betas (ABs), + 1-Alpha-Betas (ABs), Alpha BetaS (ABs) + fail: Alpha Beta xxs (ABs) ) +*/ +class FirstLetGenS : public AbbrStra { +public: + virtual bool set_condition(const char *sf); //sf must be an original sf + //true if sf is like ABCs + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- sf ch matches with 1st ch or ch right after non-alphanum of lf +- allowing one skip stopword between tokens (no more than one in row) + at least one skip stopword in total + (eg, success: alpha and beta (AB), 1-alpha and beta (AB) + fail: alpha beta (AB), alpha word beta (AB)) +*/ +class FirstLetGenStp : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- same as FirstLetGenStp except for 2 skip stopwords + & at least one two consecutive skip stopwords + */ +class FirstLetGenStp2 : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- same as FirstLetGenStp except using skip any word instead of stopword + */ +class FirstLetGenSkp : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- a matching sub-string must be word + (eg, success: AlphaBeta (AB), Beta is word + x-AlphaBeta (AB) ) +- at least one within word match + (eg,fail: Alpha Beta Word (ABW), Alpha x-Beta x-Word (ABW) + success: AlphaBeta Word (ABW), x-AlphaBeta inWord (ABW)) + */ +class WithinWrdWrd : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- WithinWrdWrd w/ Begin Word Match + (success: AlphaBeta x-Word (ABW) + fail: AlphaBeta inWord (ABW) ) + */ +class WithinWrdFWrd : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- WithinWrdFWrd w/ allowing one skip word between tokens (no more than one in row) + at least one skip word in total + (success: AlphaBeta zzz x-Word zzz (ABW) + fail: AlphaBeta x-Word (ABW), AlphaBeta zzz yyy x-Word (ABW)) +*/ +class WithinWrdFWrdSkp : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- at least one within word match + ( success: Alpha InXyy (AX), x-Alpha InXyy (AX)) + fail: Alpha Xyy (AX), Alpha 1-Xyy (AX)) +*/ +class WithinWrdLet : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- WithinWrdLet w/ Begin Word Match + (fail: Alpha InXyy (AX), x-Alpha InXyy (AX) + success: AlphaXyy Word (AXW), x-AlphaXyy 1-Word (AXW)) +*/ +class WithinWrdFLet : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- WithinWrdFLet w/ allowing one skip word between tokens (no more than one in row) + at least one skip word in total + (success: AlphaXyy zzz Word zzz (AXW) + fail: AlphaXyy Word (AXW), AlphaXyy zzz yyy Word (AXW)) +*/ +class WithinWrdFLetSkp : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- any two consecutive letter matching w/ begin word match +eg) ABxxx (AB), 1-ABxxx (AB), ABxxx Cxxx (ABC), Axxx BCxxx (ABC) + prolactin (PRL), succinylcholine (SCh) +*/ +class ContLet : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- ContLet w/ allowing one skip word between tokens (no more than one in row) + at least one skip word in total +*/ +class ContLetSkp : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +/* +- match can occur anywhere +- allow one skip word between tokens (no more than one in row) + (success: Alpha yXyy (AX), Alpha yXyy word (AX) + 1-Alpha yXyy word (AX)) +*/ +class AnyLet : public AbbrStra { +public: + virtual long strategy(const char *sf, const char *str); +}; + + +class StratUtil { +public: + AbbrStra *strat_factory(string name); + vector get_strats(string s); + //get the strategy sequence for a given #-ch SF group + void push_back_strat(string sgp, string strat); + bool group_sf(const char *sf, string &grp); + //check if sf is ok and assign a group + bool group_sf(const char *sf, const char *lf, string &grp); + //add the contion |lf|>|sf| + void remove_nonAlnum(const char *str1, char *str2); + //remove non-alnum in str1 and save it to str2 + long exist_upperal(const char *str); //return 1 if exists upper char, 0 ow + long num_token(const char *str); //return # tokens + + vector Al1, Al2, Al3, Al4, Al5; + vector Num2, Num3, Num4, Num5; + vector Spec2, Spec3, Spec4, Spec5; +}; + + +#endif + + diff --git a/Library/AbbrvE.C b/Library/AbbrvE.C index 0f60036bdcd683baffcd6ab2905974cceb6dc368..d1197c4d79c4c75c6bc013df3ef9347168e48225 100644 --- a/Library/AbbrvE.C +++ b/Library/AbbrvE.C @@ -1,629 +1,629 @@ -#include "AbbrvE.h" -#include - -namespace iret { - - Find_Seq::Find_Seq( void ) - /* initializers work in C++0x - : - seq_i ( { "i", "ii", "iii", "iv", "v", "vi" } ), - seq_I ( { "I", "II", "III", "IV", "V", "VI" } ), - seq_a ( { "a", "b", "c", "d", "e", "f" } ), - seq_A ( { "A", "B", "C", "D", "E", "F" } ) - */ - { - seq_i.push_back("i"); - seq_i.push_back("ii"); - seq_i.push_back("iii"); - seq_i.push_back("iv"); - seq_i.push_back("v"); - seq_i.push_back("vi"); - - seq_I.push_back("I"); - seq_I.push_back("II"); - seq_I.push_back("III"); - seq_I.push_back("IV"); - seq_I.push_back("V"); - seq_I.push_back("VI"); - - seq_a.push_back("a"); - seq_a.push_back("b"); - seq_a.push_back("c"); - seq_a.push_back("d"); - seq_a.push_back("e"); - seq_a.push_back("f"); - - seq_A.push_back("A"); - seq_A.push_back("B"); - seq_A.push_back("C"); - seq_A.push_back("D"); - seq_A.push_back("E"); - seq_A.push_back("F"); - - } - - void - Find_Seq::flag_seq( int numa, char* abbs[] ) { - - my_numa = numa; - my_abbs = abbs; - - my_rate.resize(numa); - for ( int i = 0; i < numa; ++i ) - my_rate[i] = true; - - find_seq(seq_i); - find_seq(seq_I); - find_seq(seq_a); - find_seq(seq_A); - - create_seq(); - } - - - void - Find_Seq::find_seq( const vector & seq ) { - - for ( int i_abbr = 0; i_abbr < my_numa-1; ++i_abbr ) { - // need to see at least 2 in sequence - - if ( seq[0] == my_abbs[i_abbr] ) { - int i_seq = 1; - while ( i_seq < seq.size() and - i_seq + i_abbr < my_numa and - seq[i_seq] == my_abbs[i_abbr + i_seq ] ) - ++i_seq; - - if ( i_seq > 1 ) - for ( int i = 0; i < i_seq; ++i ) - my_rate[i_abbr+i] = false; - } - - } - } - - void - Find_Seq::create_seq( void ) { - - for ( int i_abbr = 0; i_abbr < my_numa; ++i_abbr ) { - - size_t len = std::strlen( my_abbs[i_abbr] ); - - if ( my_abbs[i_abbr][len-1] == '1' ) { - // create sequence and test - - string prefix( my_abbs[i_abbr], len-1 ); - size_t seq_len = my_numa - i_abbr; // max possible length - vector seq; - // sequence starts with 1 - for ( int i= 1; i <= seq_len; ++i ) { - std::ostringstream stream(prefix,std::ios::app); - stream << i; - seq.push_back( stream.str() ); - } - - // cout << seq << '\n'; - find_seq(seq); - } - } - } - - - AbbrvE::AbbrvE(long ta,long wrd_spc){ - tta=ta; - word_space=wrd_spc; - abbl=new char*[tta]; - abbs=new char*[tta]; - nt=new int[tta]; - lst=new char*[word_space]; - numa=num=0; - pMt=new MPtok; - setup_Test(); - } - - AbbrvE::~AbbrvE(){ - if(numa)cleara(); - clear(); - delete [] abbl; - delete [] abbs; - delete [] nt; - delete [] lst; - delete pMt; - } - -void AbbrvE::Extract(char*pch){ - long i,j,k,u,flag; - int ix; - - if ( strlen(pch) <= 0 ) // no text to look at - return; - - token(pch); - - i=j=k=0; - flag=0; - while(ik)&&(strcmp(")",lst[i-1]))){ - j=i; - flag=1; - } - } - if(!strcmp(")",lst[i])){ - if(!flag){j=k=i+1;} - else { - if(((j>k)&&(ij+1)){ - if(k alpha beta ( AB ) - - for(jj=0; jj<2; jj++) {//deal with both () & [] - i=j=k=0; - flag=0; - - if(jj==0) { strcpy(openCh,"("); strcpy(closeCh,")"); } - else if(jj==1) { strcpy(openCh,"["); strcpy(closeCh,"]"); } - - while(ik)&&(strcmp(closeCh,lst[i-1]))){ - j=i; //index of '(' - flag=1; - } - } - if(!strcmp(closeCh,lst[i])){ - if(!flag){j=k=i+1;} //next token - else { - if(((j>k)&&(ij+1)){ - if(kcnam_size) { - cerr<<"Scratch space "<0)&&(isblank(str[u-1])))u--; - str[u]='\0'; - - while(str[j]){ - while(isblank(str[j]))j++; - i=j; - while((str[j])&&(!isblank(str[j])))j++; - lst[k]=new char[j-i+1]; - strncpy(lst[k],str+i,j-i); - lst[k][j-i]='\0'; - if(str[j]){ - k++; - j++; - } - } - num=k+1; -} - - -//both () & [] Jan-9-2008 -//(G(1)) -> ( G(1) ) Jan-28-2008 -void AbbrvE::token2(const char *pch){ - long i=1,j=0,k=0; - long u=1; - vector openChFlag1,openChFlag2; - long cflag; - long ii, jj, kk, sz; - char c,*str=cnam; - clear(); // ready space for tokens - cnam[0]=pch[0]; - while(c=pch[i]){ - switch(c){ - case '(': - //--- (h)alpha -> (h)alpha, (h)-alpha -> ( h ) -alpha - ii=kk=i; - cflag=0; - while(pch[ii] && !isblank(pch[ii])) { //pch[ii] can be '\0' - if(pch[ii]=='(') cflag -= 1; - else if(pch[ii]==')') { cflag += 1; kk=ii; } - ii++; - } - - if(!cflag && isalnum(pch[kk+1])) { //if alnum right after ')' - while(i0 && openChFlag1[sz-1]){ //modified Jan-28-08 - if(!isblank(str[u-1])){ - str[u++]=' '; - str[u++]=pch[i++]; //pch[i++] is ')' - } - //---added (Jan-11-08): (BIV; ), -> ( BIV; ) , - else if(!isblank(pch[i+1])){ - str[u++]=pch[i++]; //pch[i++] is ')' - } - //--- - - if(!isblank(pch[i]))str[u++]=' '; //pch[i] must be after ')' - } - else str[u++]=pch[i++]; - - if(sz>0) openChFlag1.pop_back(); - - break; - - case '[': - //--- [h]alpha -> [h]alpha - ii=kk=i; - cflag=0; - while(pch[ii] && !isblank(pch[ii])) { //pch[ii] can be '\0' - if(pch[ii]=='[') cflag -= 1; - else if(pch[ii]==']') { cflag += 1; kk=ii; } - ii++; - } - - if(!cflag && isalnum(pch[kk+1])) { //if alnum right after ')' - while(i0 && openChFlag2[sz-1]){ //modified Jan-28-08 - if(!isblank(str[u-1])){ - str[u++]=' '; - str[u++]=pch[i++]; - } - //---added (Jan-11-08): [BIV; ], -> [ BIV; ] , - else if(!isblank(pch[i+1])){ - str[u++]=pch[i++]; - } - //--- - if(!isblank(pch[i]))str[u++]=' '; - } - else str[u++]=pch[i++]; - - if(sz>0) openChFlag2.pop_back(); - - break; - default: str[u++]=pch[i++]; - } - } - while((u>0)&&(isblank(str[u-1])))u--; - str[u]='\0'; - - while(str[j]){ - while(isblank(str[j]))j++; - i=j; - while((str[j])&&(!isblank(str[j])))j++; - lst[k]=new char[j-i+1]; - strncpy(lst[k],str+i,j-i); - lst[k][j-i]='\0'; - if(str[j]){ - k++; - j++; - } - } - num=k+1; -} - - - void AbbrvE::clear(void){ - for ( int i=0; i pre.size() and - 0 == pre.compare( 0, pre.size(), str, pre.size() ) ) - return true; - } - return false; - } - - -//no space before and after abbs[] (because of using token) -bool AbbrvE::Test(const char *str){ - - if ( match.find(str) != match.end() ) return false; - if ( prefix_match(str) ) return false; - - size_t length, letters, digits; - length = letters = digits = 0; - - char c; - while((c=str[length])&&(c!=' ')){ - length++; - if ( isdigit(c) ) digits++; - if ( isalpha(c) ) letters++; - - if( length==digits and length>=3 ) return false; - } - if ( digits == length ) return false; - if ( letters <= 0 ) return false; - - return true; -} - - void AbbrvE::setup_Test( void ) { - - match.insert("author's transl"); - match.insert("proceedings"); - match.insert("see"); - match.insert("and"); - match.insert("comment"); - match.insert("letter"); - match.insert("eg"); - - prefix.push_back("="); - prefix.push_back("eg."); - prefix.push_back("eg,"); - prefix.push_back("see "); - prefix.push_back("see,"); - prefix.push_back("p<"); - prefix.push_back("P<"); - - // rules added in 2010 - match.insert("e.g."); - match.insert("ie"); - match.insert("i.e."); - match.insert("mean"); - match.insert("age"); - match.insert("std"); - match.insert("range"); - match.insert("young"); - match.insert("old"); - match.insert("male"); - match.insert("female"); - - } - -void AbbrvE::Proc(char *pxh){ - long i,j; - char *pch,*ptr; - pMt->segment(pxh); - for(i=0;isent.size();i++){ - Extract2( (pMt->sent[i]).c_str() ); - } - - seq.flag_seq( numa, abbs ); - j=0; - for(i=0;i + +namespace iret { + + Find_Seq::Find_Seq( void ) + /* initializers work in C++0x + : + seq_i ( { "i", "ii", "iii", "iv", "v", "vi" } ), + seq_I ( { "I", "II", "III", "IV", "V", "VI" } ), + seq_a ( { "a", "b", "c", "d", "e", "f" } ), + seq_A ( { "A", "B", "C", "D", "E", "F" } ) + */ + { + seq_i.push_back("i"); + seq_i.push_back("ii"); + seq_i.push_back("iii"); + seq_i.push_back("iv"); + seq_i.push_back("v"); + seq_i.push_back("vi"); + + seq_I.push_back("I"); + seq_I.push_back("II"); + seq_I.push_back("III"); + seq_I.push_back("IV"); + seq_I.push_back("V"); + seq_I.push_back("VI"); + + seq_a.push_back("a"); + seq_a.push_back("b"); + seq_a.push_back("c"); + seq_a.push_back("d"); + seq_a.push_back("e"); + seq_a.push_back("f"); + + seq_A.push_back("A"); + seq_A.push_back("B"); + seq_A.push_back("C"); + seq_A.push_back("D"); + seq_A.push_back("E"); + seq_A.push_back("F"); + + } + + void + Find_Seq::flag_seq( int numa, char* abbs[] ) { + + my_numa = numa; + my_abbs = abbs; + + my_rate.resize(numa); + for ( int i = 0; i < numa; ++i ) + my_rate[i] = true; + + find_seq(seq_i); + find_seq(seq_I); + find_seq(seq_a); + find_seq(seq_A); + + create_seq(); + } + + + void + Find_Seq::find_seq( const vector & seq ) { + + for ( int i_abbr = 0; i_abbr < my_numa-1; ++i_abbr ) { + // need to see at least 2 in sequence + + if ( seq[0] == my_abbs[i_abbr] ) { + int i_seq = 1; + while ( i_seq < seq.size() and + i_seq + i_abbr < my_numa and + seq[i_seq] == my_abbs[i_abbr + i_seq ] ) + ++i_seq; + + if ( i_seq > 1 ) + for ( int i = 0; i < i_seq; ++i ) + my_rate[i_abbr+i] = false; + } + + } + } + + void + Find_Seq::create_seq( void ) { + + for ( int i_abbr = 0; i_abbr < my_numa; ++i_abbr ) { + + size_t len = std::strlen( my_abbs[i_abbr] ); + + if ( my_abbs[i_abbr][len-1] == '1' ) { + // create sequence and test + + string prefix( my_abbs[i_abbr], len-1 ); + size_t seq_len = my_numa - i_abbr; // max possible length + vector seq; + // sequence starts with 1 + for ( int i= 1; i <= seq_len; ++i ) { + std::ostringstream stream(prefix,std::ios::app); + stream << i; + seq.push_back( stream.str() ); + } + + // cout << seq << '\n'; + find_seq(seq); + } + } + } + + + AbbrvE::AbbrvE(long ta,long wrd_spc){ + tta=ta; + word_space=wrd_spc; + abbl=new char*[tta]; + abbs=new char*[tta]; + nt=new int[tta]; + lst=new char*[word_space]; + numa=num=0; + pMt=new MPtok; + setup_Test(); + } + + AbbrvE::~AbbrvE(){ + if(numa)cleara(); + clear(); + delete [] abbl; + delete [] abbs; + delete [] nt; + delete [] lst; + delete pMt; + } + +void AbbrvE::Extract(char*pch){ + long i,j,k,u,flag; + int ix; + + if ( strlen(pch) <= 0 ) // no text to look at + return; + + token(pch); + + i=j=k=0; + flag=0; + while(ik)&&(strcmp(")",lst[i-1]))){ + j=i; + flag=1; + } + } + if(!strcmp(")",lst[i])){ + if(!flag){j=k=i+1;} + else { + if(((j>k)&&(ij+1)){ + if(k alpha beta ( AB ) + + for(jj=0; jj<2; jj++) {//deal with both () & [] + i=j=k=0; + flag=0; + + if(jj==0) { strcpy(openCh,"("); strcpy(closeCh,")"); } + else if(jj==1) { strcpy(openCh,"["); strcpy(closeCh,"]"); } + + while(ik)&&(strcmp(closeCh,lst[i-1]))){ + j=i; //index of '(' + flag=1; + } + } + if(!strcmp(closeCh,lst[i])){ + if(!flag){j=k=i+1;} //next token + else { + if(((j>k)&&(ij+1)){ + if(kcnam_size) { + cerr<<"Scratch space "<0)&&(isblank(str[u-1])))u--; + str[u]='\0'; + + while(str[j]){ + while(isblank(str[j]))j++; + i=j; + while((str[j])&&(!isblank(str[j])))j++; + lst[k]=new char[j-i+1]; + strncpy(lst[k],str+i,j-i); + lst[k][j-i]='\0'; + if(str[j]){ + k++; + j++; + } + } + num=k+1; +} + + +//both () & [] Jan-9-2008 +//(G(1)) -> ( G(1) ) Jan-28-2008 +void AbbrvE::token2(const char *pch){ + long i=1,j=0,k=0; + long u=1; + vector openChFlag1,openChFlag2; + long cflag; + long ii, jj, kk, sz; + char c,*str=cnam; + clear(); // ready space for tokens + cnam[0]=pch[0]; + while(c=pch[i]){ + switch(c){ + case '(': + //--- (h)alpha -> (h)alpha, (h)-alpha -> ( h ) -alpha + ii=kk=i; + cflag=0; + while(pch[ii] && !isblank(pch[ii])) { //pch[ii] can be '\0' + if(pch[ii]=='(') cflag -= 1; + else if(pch[ii]==')') { cflag += 1; kk=ii; } + ii++; + } + + if(!cflag && isalnum(pch[kk+1])) { //if alnum right after ')' + while(i0 && openChFlag1[sz-1]){ //modified Jan-28-08 + if(!isblank(str[u-1])){ + str[u++]=' '; + str[u++]=pch[i++]; //pch[i++] is ')' + } + //---added (Jan-11-08): (BIV; ), -> ( BIV; ) , + else if(!isblank(pch[i+1])){ + str[u++]=pch[i++]; //pch[i++] is ')' + } + //--- + + if(!isblank(pch[i]))str[u++]=' '; //pch[i] must be after ')' + } + else str[u++]=pch[i++]; + + if(sz>0) openChFlag1.pop_back(); + + break; + + case '[': + //--- [h]alpha -> [h]alpha + ii=kk=i; + cflag=0; + while(pch[ii] && !isblank(pch[ii])) { //pch[ii] can be '\0' + if(pch[ii]=='[') cflag -= 1; + else if(pch[ii]==']') { cflag += 1; kk=ii; } + ii++; + } + + if(!cflag && isalnum(pch[kk+1])) { //if alnum right after ')' + while(i0 && openChFlag2[sz-1]){ //modified Jan-28-08 + if(!isblank(str[u-1])){ + str[u++]=' '; + str[u++]=pch[i++]; + } + //---added (Jan-11-08): [BIV; ], -> [ BIV; ] , + else if(!isblank(pch[i+1])){ + str[u++]=pch[i++]; + } + //--- + if(!isblank(pch[i]))str[u++]=' '; + } + else str[u++]=pch[i++]; + + if(sz>0) openChFlag2.pop_back(); + + break; + default: str[u++]=pch[i++]; + } + } + while((u>0)&&(isblank(str[u-1])))u--; + str[u]='\0'; + + while(str[j]){ + while(isblank(str[j]))j++; + i=j; + while((str[j])&&(!isblank(str[j])))j++; + lst[k]=new char[j-i+1]; + strncpy(lst[k],str+i,j-i); + lst[k][j-i]='\0'; + if(str[j]){ + k++; + j++; + } + } + num=k+1; +} + + + void AbbrvE::clear(void){ + for ( int i=0; i pre.size() and + 0 == pre.compare( 0, pre.size(), str, pre.size() ) ) + return true; + } + return false; + } + + +//no space before and after abbs[] (because of using token) +bool AbbrvE::Test(const char *str){ + + if ( match.find(str) != match.end() ) return false; + if ( prefix_match(str) ) return false; + + size_t length, letters, digits; + length = letters = digits = 0; + + char c; + while((c=str[length])&&(c!=' ')){ + length++; + if ( isdigit(c) ) digits++; + if ( isalpha(c) ) letters++; + + if( length==digits and length>=3 ) return false; + } + if ( digits == length ) return false; + if ( letters <= 0 ) return false; + + return true; +} + + void AbbrvE::setup_Test( void ) { + + match.insert("author's transl"); + match.insert("proceedings"); + match.insert("see"); + match.insert("and"); + match.insert("comment"); + match.insert("letter"); + match.insert("eg"); + + prefix.push_back("="); + prefix.push_back("eg."); + prefix.push_back("eg,"); + prefix.push_back("see "); + prefix.push_back("see,"); + prefix.push_back("p<"); + prefix.push_back("P<"); + + // rules added in 2010 + match.insert("e.g."); + match.insert("ie"); + match.insert("i.e."); + match.insert("mean"); + match.insert("age"); + match.insert("std"); + match.insert("range"); + match.insert("young"); + match.insert("old"); + match.insert("male"); + match.insert("female"); + + } + +void AbbrvE::Proc(char *pxh){ + long i,j; + char *pch,*ptr; + pMt->segment(pxh); + for(i=0;isent.size();i++){ + Extract2( (pMt->sent[i]).c_str() ); + } + + seq.flag_seq( numa, abbs ); + j=0; + for(i=0;i -#include -#include -#include -#include -using namespace std; -namespace iret { - -typedef vector strings; - - -class Find_Seq { -public: - - Find_Seq( void ); - - // flag the SFs whether part of sequence or not - void flag_seq( int numa, char* abbs[] ); - - // true if good SF, false if part of sequence - bool rate( int i ) const { my_rate[i]; } - -private: - void find_seq( const vector & seq ); - void create_seq( void ); - - // const works with c++0x - /* const */ strings seq_i; - /* const */ strings seq_I; - /* const */ strings seq_a; - /* const */ strings seq_A; - - vector my_rate; - int my_numa; - char ** my_abbs; // really char *[], but that doesn't work - -}; - - -class AbbrvE { - public: - AbbrvE(long ta=10000,long wrd_spc=10000); //Sets space for extracted - //potential abbreviations to ta & word_space to wrd_spc - ~AbbrvE(void); - void Extract(char *pch); //Extracts possible long-short form - //pairs, but does not attempt to find the relationship - void Extract2(const char *pch); //extened version (Jan-9-2008) - bool Test(const char *str); //Tests a single token and returns true - //if the token should be a possible first token of a short form - void Rate(void); //Sets ratings for the proposed pairs. Effort to - //remove (a), (b), etc., sequence markers - void token(const char *str); //Produces a list of tokens in order of - //of occurrence in the string. - void token2(const char *str); //extended version (Jan-9-2008) - void cleara(void); //Clear the abbl & abbs memory of strings - void clear(void); //Clear the lst memory of words - - //Application functions - void Proc(char *pch); //Accepts a natural language statement and - //processes to final results stored in tta, abbs, and abbl - //Need to call cleara function after each use of this function - - // Internal routines: - // setup data for Test method - void setup_Test( void ); - bool prefix_match( const char *str ); // does str begins with a prefix? - - //Data - long tta; //Total possible abbreviations extracted - //default 10k - long numa; //number of abbreviations in current extract - char **abbl; //Long form space, hold up to 10 tokens - char **abbs; //Short form space, hold up to 10 tokens - Find_Seq seq; // identify sequences to ignore - int *nt; //Number of tokens within parentheses - long word_space; //Space in lst for tokens - //default 10k - long num; //Number of tokens - char **lst; //Holds the tokens - - static const int cnam_size=100000; - char cnam[cnam_size]; //Work space - MPtok *pMt; //Pointer at tokenizer class. Used to segment text - //in Proc function. - - // Test data - set match; // bad SF to match exactly - vector prefix; // bad SF to match prefix -}; -} -#endif +#ifndef ABBRVE_H +#define ABBRVE_H +#include +#include +#include +#include +#include +using namespace std; +namespace iret { + +typedef vector strings; + + +class Find_Seq { +public: + + Find_Seq( void ); + + // flag the SFs whether part of sequence or not + void flag_seq( int numa, char* abbs[] ); + + // true if good SF, false if part of sequence + bool rate( int i ) const { my_rate[i]; } + +private: + void find_seq( const vector & seq ); + void create_seq( void ); + + // const works with c++0x + /* const */ strings seq_i; + /* const */ strings seq_I; + /* const */ strings seq_a; + /* const */ strings seq_A; + + vector my_rate; + int my_numa; + char ** my_abbs; // really char *[], but that doesn't work + +}; + + +class AbbrvE { + public: + AbbrvE(long ta=10000,long wrd_spc=10000); //Sets space for extracted + //potential abbreviations to ta & word_space to wrd_spc + ~AbbrvE(void); + void Extract(char *pch); //Extracts possible long-short form + //pairs, but does not attempt to find the relationship + void Extract2(const char *pch); //extened version (Jan-9-2008) + bool Test(const char *str); //Tests a single token and returns true + //if the token should be a possible first token of a short form + void Rate(void); //Sets ratings for the proposed pairs. Effort to + //remove (a), (b), etc., sequence markers + void token(const char *str); //Produces a list of tokens in order of + //of occurrence in the string. + void token2(const char *str); //extended version (Jan-9-2008) + void cleara(void); //Clear the abbl & abbs memory of strings + void clear(void); //Clear the lst memory of words + + //Application functions + void Proc(char *pch); //Accepts a natural language statement and + //processes to final results stored in tta, abbs, and abbl + //Need to call cleara function after each use of this function + + // Internal routines: + // setup data for Test method + void setup_Test( void ); + bool prefix_match( const char *str ); // does str begins with a prefix? + + //Data + long tta; //Total possible abbreviations extracted + //default 10k + long numa; //number of abbreviations in current extract + char **abbl; //Long form space, hold up to 10 tokens + char **abbs; //Short form space, hold up to 10 tokens + Find_Seq seq; // identify sequences to ignore + int *nt; //Number of tokens within parentheses + long word_space; //Space in lst for tokens + //default 10k + long num; //Number of tokens + char **lst; //Holds the tokens + + static const int cnam_size=100000; + char cnam[cnam_size]; //Work space + MPtok *pMt; //Pointer at tokenizer class. Used to segment text + //in Proc function. + + // Test data + set match; // bad SF to match exactly + vector prefix; // bad SF to match prefix +}; +} +#endif diff --git a/Library/Btree.C b/Library/Btree.C index 64f01624b798f279552d0573c54785a0025274a3..2e52a712234a121f5da69b0a575176d9f7e0e5c7 100644 --- a/Library/Btree.C +++ b/Library/Btree.C @@ -1,1304 +1,1304 @@ -#include -#include -#include -#include -#include -#include -#include -#include "Btree.h" -#include "runn.h" - -using namespace std; -namespace iret { - -Node::Node(void){ - str=NULL; - rel=NULL; - pdn=NULL; -} - -Node::Node(const char *ptr){ - int i=strlen(ptr); - str = new char[i+1]; - strcpy(str,ptr); - rel=NULL; - pdn=NULL; -} - -Node::Node(char const *ptr,void *dtr){ - int i=strlen(ptr); - str = new char[i+1]; - strcpy(str,ptr); - rel = dtr; - pdn=NULL; -} - -Node::~Node(){ - if(str)delete [] str; -} - -void Node::debug(void){ - cout << "Node {" << endl; - cout << " str: " << this->str << endl; - if(rel==NULL)cout << " rel: NULL" << endl; - else cout << " rel: " << (long)rel << endl; - if(pdn==NULL)cout << " pdn: NULL" << endl; - else cout << " pdn: " << (long)pdn << endl; - cout << " }" << endl; -} - -Page::Page(){ - pdn=NULL; - ndnm='\0'; -} - -Page::Page(Page *const pz,Page *const pn,const int n){ - pdn=pn; - int j=(int)(pz->ndnm)-n; - ndnm=(char)(j>0 ? j : 0); - for(int i=0;i<(int)ndnm;i++){pnd[i]=(pz->pnd)[n+i];} -} - -Page::~Page(){ - for(int i=0;i<(int)ndnm;i++){ - delete pnd[i]; - } -} - -void Page::clean(void){ - for(int i=0;i<(int)ndnm;i++){ - pnd[i]->str=NULL; - } -} - -void Page::insert(const int n,Node * const nd,const int j){ - assert(jn;i--)pnd[i]=pnd[i-1]; - pnd[n]=nd; - } - ndnm++; -} - -int Page::search(int &a,int &b,const char *str,int &p){ - int j; - if((j=stc_my(a,b,str,pnd[0]->str))<0){ - p=0; - return(0); - } - else if(j==0){ - p=0; - return(1); - } - if((j=stc_my(a,b,str,pnd[(int)(ndnm-1)]->str))>0){ - p=(int)ndnm; - return(0); - } - else if(j==0){ - p=(int)(ndnm-1); - return(1); - } - int x=0,i; - int y=(int)(ndnm-1); - while(y-x>1){ - i=(y+x)/2; - if((j=stc_my(a,b,str,pnd[i]->str))==0){p=i;return(1);} - else if(j<0)y=i; - else x=i; - } - p=y; - return(0); -} - -int Page::search(int &a,int &b,char *str,int &p,Partial_match *btr){ - int j; - if((j=btr->stc_my_long(a,b,str,pnd[0]->str,0))<0){ - p=0; - return(0); - } - else if(j==0){ - p=0; - return(1); - } - if((j=btr->stc_my_long(a,b,str,pnd[(int)(ndnm-1)]->str,(int)(ndnm-1)))>0){ - p=(int)ndnm; - return(0); - } - else if(j==0){ - p=(int)(ndnm-1); - return(1); - } - int x=0,i; - int y=(int)(ndnm-1); - while(y-x>1){ - i=(y+x)/2; - if((j=btr->stc_my_long(a,b,str,pnd[i]->str,i))==0){p=i;return(1);} - else if(j<0)y=i; - else x=i; - } - p=y; - return(0); -} - -void Page::debug(void){ - cout << "Page {" << endl; - cout << " ndnm: " << (int)ndnm << endl; - if(pdn==NULL)cout << " pdn: NULL" << endl; - else cout << " pdn: " << (long)pdn << endl; - for(int i=0;i<(int)ndnm;i++){ - cout << i << " "; - (this->pnd[i])->debug(); - } - cout << " }" << endl; -} - -int stc_my(int &a,int &b,const char *str,const char *ptr) - {register int i=(andnm = 1; - (root->pnd)[0]=new Node(""); -} - -int Btree::search(const char *str){ - depth=-1; - Page *pu=root; - register int a=0,b=0,i,j; - while(pu!=NULL){ - depth++; - pg[depth]=pu; - j=(pu->search)(a,b,str,i); - cnd[depth]=i; - if(j==1)return(1); - if(i==0)pu=pu->pdn; - else pu=(pu->pnd)[i-1]->pdn; - } -return(0); -} - -int Btree::insert(Node *nd){ - int w,k; - Page *pm,*pz; - while((nd!=NULL)&&(depth)){ - pm=pg[depth]; - w=pm->ndnm; - if(winsert(cnd[depth],nd,w); - nd=NULL; - } - else { - k=cnd[depth]; - if(kpnd)[order-1])->pdn,order); - pm->insert(k,nd,order); - nd=pm->pnd[order]; - nd->pdn=pz; - pm->ndnm=order; - } - else if(k>order){ - pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); - pz->insert(k-order-1,nd,order-1); - nd=pm->pnd[order]; - nd->pdn=pz; - pm->ndnm=order; - } - else { - pz=new Page(pm,nd->pdn,order); - nd->pdn=pz; - pm->ndnm=order; - } - } - depth--; - } - if(nd!=NULL){ - pm=pg[depth]; - w=pm->ndnm; - if(winsert(cnd[depth],nd,w); - else { - root=new Page(); - root->pdn=pm; - k=cnd[depth]; - if(kpnd)[order-1])->pdn,order); - pm->insert(k,nd,order); - (root->pnd)[0]=pm->pnd[order]; - ((root->pnd)[0])->pdn=pz; - root->ndnm=1; - pm->ndnm=order; - } - else if(k>order){ - pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); - pz->insert(k-order-1,nd,order-1); - (root->pnd)[0]=pm->pnd[order]; - ((root->pnd)[0])->pdn=pz; - root->ndnm=1; - pm->ndnm=order; - } - else { - pz=new Page(pm,nd->pdn,order); - (root->pnd)[0]=nd; - nd->pdn=pz; - root->ndnm=1; - pm->ndnm=order; - } - } - } -return(1); -} - -void Btree::node_first(void){ - depth=0; - pg[depth]=root; - cnd[depth]=0; - Page *pm; - while((pm=(pg[depth]->pdn))!=NULL){ - depth++; - pg[depth]=pm; - cnd[depth]=0; - } -} - -int Btree::node_next(){ - int i=cnd[depth]; - Page *pd=((pg[depth]->pnd)[i])->pdn; - if(pd!=NULL){ - (cnd[depth])++; - depth++; - pg[depth]=pd; - cnd[depth]=0; - while((pd=(pg[depth]->pdn))!=NULL){ - depth++; - pg[depth]=pd; - cnd[depth]=0; - } - } - else { - cnd[depth]=++i; - while((depth>=1)&&(i==(pg[depth]->ndnm))){depth--;i=cnd[depth];} - if((depth==0)&&(i==(pg[depth]->ndnm)))depth--; - if(depth<0)return(0); - } -return(1); -} - -char *Btree::show_str(){ - return(((pg[depth]->pnd)[cnd[depth]])->str); -} - -void *Btree::give_ptr(){ - return(((pg[depth]->pnd)[cnd[depth]])->rel); -} - -void Btree::set_ptr(void *dtr){ - ((pg[depth]->pnd)[cnd[depth]])->rel=dtr; -} - -Btree::~Btree(){ - int pflag=get_qflag(); - long k=0; - if (copy) return; // only delete original - if(!iclean){ - node_first(); - int i=depth,j; - do{ - j=node_next(); - if(depthdepth){ - delete pg[i]; - i--; - mark(pflag,++k,1000,"pages deleted"); - } - } - else i=depth; - } while(j); - } - else { - node_first(); - int i=depth,j; - do{ - j=node_next(); - if(depthdepth){ - pg[i]->clean(); - delete pg[i]; - i--; - mark(pflag,++k,1000,"pages deleted"); - } - } - else i=depth; - } while(j); - } -} - -long Btree::list_write(ofstream &fout){ - int pflag=get_qflag(); - long ct=0; - node_first(); - while(node_next()){ - fout << show_str() << endl; - mark(pflag,++ct,1000,"strings written"); - } - fout.close(); - return((int)fout.good()); -} - -Btree::Btree(ifstream &fin){ - copy=false; - char cnam[256]; - int pflag=get_qflag(); - depth=0; - pg[0]=root=new Page(); - cnd[0]=root->ndnm = 1; - (root->pnd)[0]=new Node(""); - Node *pno; - long ct=0; - while(get_string(cnam,fin,'\n')){ - pno = new Node(cnam); - add(pno); - mark(pflag,++ct,10000,"strings read"); - } - fin.close(); -} - -int Btree::add(Node *nd){ - int w,k,dp; - Page *pm,*pz; - dp=depth; //uses dp in place of depth in insert. - while((nd!=NULL)&&(dp)){ - pm=pg[dp]; - w=pm->ndnm; - if(winsert(cnd[dp],nd,w); - nd=NULL; - (cnd[dp])++; //variation from insert. - } - else { - k=cnd[dp]; - if(kpnd)[order-1])->pdn,order); - pm->insert(k,nd,order); - nd=pm->pnd[order]; - nd->pdn=pz; - pm->ndnm=order; - } - else if(k>order){ - pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); - pz->insert(k-order-1,nd,order-1); - nd=pm->pnd[order]; - nd->pdn=pz; - pm->ndnm=order; - } - else { - pz=new Page(pm,nd->pdn,order); - nd->pdn=pz; - pm->ndnm=order; - } - pg[dp]=pz; //2 lines of variation from insert. - cnd[dp]=order; - } - dp--; - } - if(nd!=NULL){ - pm=pg[dp]; - w=pm->ndnm; - if(winsert(cnd[dp],nd,w); - (cnd[dp])++; //variation from insert. - } - else { - root=new Page(); - root->pdn=pm; - k=cnd[dp]; - if(kpnd)[order-1])->pdn,order); - pm->insert(k,nd,order); - (root->pnd)[0]=pm->pnd[order]; - ((root->pnd)[0])->pdn=pz; - root->ndnm=1; - pm->ndnm=order; - } - else if(k>order){ - pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); - pz->insert(k-order-1,nd,order-1); - (root->pnd)[0]=pm->pnd[order]; - ((root->pnd)[0])->pdn=pz; - root->ndnm=1; - pm->ndnm=order; - } - else { - pz=new Page(pm,nd->pdn,order); - (root->pnd)[0]=nd; - nd->pdn=pz; - root->ndnm=1; - pm->ndnm=order; - } - next_empty(); //variation from insert. - } - } -return(1); -} - -void Btree::next_empty(){ - depth=0; - pg[depth]=root; - int i=cnd[depth]=root->ndnm; - Page *pm; - while((pm=((pg[depth]->pnd)[i-1])->pdn)!=NULL){ - depth++; - pg[depth]=pm; - i=cnd[depth]=pm->ndnm; - } -} - -Str_str::Str_str() : Btree() { -} - -Str_str::~Str_str(){ - if(copy)return; - this->node_first(); - while(this->node_next())delete [] (char*)this->give_ptr(); -} - -void Str_str::add_pair(const char *one,const char *two){ - Node *pnd; - if(search(one)){ - cout << "Duplicate string in keys list = " << one << endl; - exit(0); - } - else { - int i=strlen(two); - char *st=new char[i+1]; - strcpy(st,two); - pnd=new Node(one,(void *)st); - add(pnd); - } -} - -char *Str_str::match(const char *one){ - if(search(one)){ - return((char*)give_ptr()); - } - else { - cout << "String not a key = " << one << endl; - exit(0); - } -} - -List::List() : Btree() { - cnt_key=0; -} - -List::~List(){ -} - -void List::add_key(const char *str){ - Node *pnd; - if(!search(str)){ - pnd=new Node(str); - add(pnd); - } -} - -void List::add_key_count(const char *str){ - Node *pnd; - if(!search(str)){ - pnd=new Node(str); - add(pnd); - cnt_key++; - } -} - -void List::addp_key_count(char *str){ - Node *pnd; - if(!search(str)){ - pnd=new Node; - pnd->str=str; - add(pnd); - cnt_key++; - } -} - -Num_num::Num_num() : Btree() { -} - -Num_num::~Num_num(){ - if(copy)return; - this->node_first(); - while(this->node_next())delete (long*)this->give_ptr(); -} - -void Num_num::add_pair(long i,long j){ - Node *pnd; - char cnam[256]; - long_str(cnam,i); - if(!search(cnam)){ - long *st=new long; - *st=j; - pnd=new Node(cnam,(void *)st); - add(pnd); - } -} - -long Num_num::match(long i){ - char cnam[256]; - long_str(cnam,i); - if(search(cnam)){ - return(*((long*)give_ptr())); - } - else return(LNEG); -} - -Count::Count() : List() { - total=0; -} - -Count::~Count(){ - if(copy)return; - long *pk; - this->node_first(); - while(this->node_next()){ - pk=(long*)(this->give_ptr()); - if(pk)delete pk; - } -} - -void Count::add_count(const char *pch,long n){ - long *ppt; - Node *np; - total+=n; - if(this->search(pch)==0){ - ppt = new long; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - } - else { - (*(long*) this->give_ptr())+=n; - } -} - -void Count::add_countz(const char *pch,long n){ - long *ppt; - Node *np; - if(this->search(pch)==0){ - ppt = new long; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - cnt_key++; - } - else { - (*(long*) this->give_ptr())+=n; - } -} - -void Count::add_count2(const char *pch,long n){ - long *ppt; - Node *np; - total+=n; - if(this->search(pch)==0){ - ppt = new long; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - cnt_key++; - } - else { - (*(long*) this->give_ptr())+=n; - } -} - -void Count::addp_count2(char *pch,long n){ - long *ppt; - Node *np; - total+=n; - if(this->search(pch)==0){ - ppt = new long; - (*ppt) =n; - np=new Node; - np->str=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - (*(long*) this->give_ptr())+=n; - } -} - -void Count::correct(const char *pch,long n){ - if(this->search(pch)){ - (*(long*) this->give_ptr())=n; - } -} - -long Count::count(const char *pch){ - if(this->search(pch)==0){ - return(0); - } - else { - return(*((long*) this->give_ptr())); - } -} - -long Count::count(void){ - return(*((long*) this->give_ptr())); -} - -void Count::max_count(const char *pch,long n){ - long *ppt,i; - Node *np; - total+=n; - if(!search(pch)){ - ppt = new long; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - } - else { - ppt=(long*)give_ptr(); - if(*pptinsert(np); - cnt_key++; - } - else { - ppt=(long*)give_ptr(); - if(*pptstr=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - ppt=(long*)give_ptr(); - if(*pptinsert(np); - } - else { - ppt=(long*)give_ptr(); - if(*ppt>n)*ppt=n; - } -} - -void Count::min_count2(const char *pch,long n){ - long *ppt,i; - Node *np; - total+=n; - if(!search(pch)){ - ppt = new long; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - cnt_key++; - } - else { - ppt=(long*)give_ptr(); - if(*ppt>n)*ppt=n; - } -} - -void Count::minp_count2(char *pch,long n){ - long *ppt,i; - Node *np; - total+=n; - if(!search(pch)){ - ppt = new long; - (*ppt) =n; - np=new Node; - np->str=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - ppt=(long*)give_ptr(); - if(*ppt>n)*ppt=n; - } -} - -//FCount (float count tree) - -FCount::FCount() : List() { - total=0; -} - -FCount::~FCount(){ - if(copy)return; - float *pk; - this->node_first(); - while(this->node_next()){ - pk=(float*)(this->give_ptr()); - if(pk)delete pk; - } -} - -void FCount::Copy(FCount &Fc){ - char *pch; - float *xx,*zz; - Node *pN; - - pg[0]=root; - cnd[0]=root->ndnm; - - Fc.node_first(); - while(Fc.node_next()){ - pch=Fc.show_str(); - xx=(float*)Fc.give_ptr(); - zz=new float; - *zz=*xx; - pN=new Node(pch,(void*)zz); - add(pN); - } -} - -void FCount::add_count(const char *pch,float z){ - float *ppt; - Node *np; - total+=z; - if(this->search(pch)==0){ - ppt = new float; - (*ppt) =z; - np=new Node(pch,(void*)ppt); - this->insert(np); - } - else { - (*(float*) this->give_ptr())+=z; - } -} - -void FCount::add_count2(const char *pch,float z){ - float *ppt; - Node *np; - total+=z; - if(this->search(pch)==0){ - ppt = new float; - (*ppt) =z; - np=new Node(pch,(void*)ppt); - this->insert(np); - cnt_key++; - } - else { - (*(float*) this->give_ptr())+=z; - } -} - -void FCount::addp_count2(char *pch,float z){ - float *ppt; - Node *np; - total+=z; - if(this->search(pch)==0){ - ppt = new float; - (*ppt) =z; - np=new Node; - np->str=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - (*(float*) this->give_ptr())+=z; - } -} - -float FCount::count(const char *pch){ - if(this->search(pch)==0){ - return(0); - } - else { - return(*((float*) this->give_ptr())); - } -} - -float FCount::count(void){ - return(*((float*) this->give_ptr())); -} - -//DCount (double precision count tree) - -DCount::DCount() : List() { - total=0; -} - -DCount::~DCount(){ - if(copy)return; - double *pk; - this->node_first(); - while(this->node_next()){ - pk=(double*)(this->give_ptr()); - if(pk)delete pk; - } -} - -void DCount::Copy(DCount &Dc){ - char *pch; - double *xx,*zz; - Node *pN; - - pg[0]=root; - cnd[0]=root->ndnm; - - Dc.node_first(); - while(Dc.node_next()){ - pch=Dc.show_str(); - xx=(double*)Dc.give_ptr(); - zz=new double; - *zz=*xx; - pN=new Node(pch,(void*)zz); - add(pN); - } -} - -void DCount::add_count(const char *pch,double z){ - double *ppt; - Node *np; - total+=z; - if(this->search(pch)==0){ - ppt = new double; - (*ppt) =z; - np=new Node(pch,(void*)ppt); - this->insert(np); - } - else { - (*(double*) this->give_ptr())+=z; - } -} - -void DCount::add_count2(const char *pch,double z){ - double *ppt; - Node *np; - total+=z; - if(this->search(pch)==0){ - ppt = new double; - (*ppt) =z; - np=new Node(pch,(void*)ppt); - this->insert(np); - cnt_key++; - } - else { - (*(double*) this->give_ptr())+=z; - } -} - -void DCount::addp_count2(char *pch,double z){ - double *ppt; - Node *np; - total+=z; - if(this->search(pch)==0){ - ppt = new double; - (*ppt) =z; - np=new Node; - np->str=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - (*(double*) this->give_ptr())+=z; - } -} - -double DCount::count(const char *pch){ - if(this->search(pch)==0){ - return(0); - } - else { - return(*((double*) this->give_ptr())); - } -} - -double DCount::count(void){ - return(*((double*) this->give_ptr())); -} - -void DCount::max_count(const char *pch,double z){ - double *ppt; - Node *np; - total+=z; - if(!search(pch)){ - ppt = new double; - (*ppt) =z; - np=new Node(pch,(void*)ppt); - this->insert(np); - } - else { - ppt=(double*)give_ptr(); - if(*pptinsert(np); - cnt_key++; - } - else { - ppt=(double*)give_ptr(); - if(*pptstr=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - ppt=(double*)give_ptr(); - if(*pptinsert(np); - } - else { - ppt=(double*)give_ptr(); - if(*ppt>z)*ppt=z; - } -} - -void DCount::min_count2(const char *pch,double z){ - double *ppt; - Node *np; - total+=z; - if(!search(pch)){ - ppt = new double; - (*ppt) =z; - np=new Node(pch,(void*)ppt); - this->insert(np); - cnt_key++; - } - else { - ppt=(double*)give_ptr(); - if(*ppt>z)*ppt=z; - } -} - -void DCount::minp_count2(char *pch,double z){ - double *ppt; - Node *np; - total+=z; - if(!search(pch)){ - ppt = new double; - (*ppt) =z; - np=new Node; - np->str=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - ppt=(double*)give_ptr(); - if(*ppt>z)*ppt=z; - } -} - -void DCount::debug(void){ - node_first(); - while(node_next()){ - cout << count() << " " << show_str() << endl; - } -} - -//Partial Match - -Partial_match::Partial_match() : Count() { -} - -Partial_match::~Partial_match(){ -} - -void Partial_match::long_match(char *str,List &Lst){ - char *pch; - while(*str!='\0'){ - if(this->search_long(str)){ - pch=this->show_str(); - Lst.add_key_count(pch); - } - if((pch=strchr(str,' '))!=NULL)str=pch+1; - else str=str+strlen(str); - } -} - -void Partial_match::local_match(char *str,List &Lst){ - char *pch; - int i,j; - if(*str!='\0'){ - if(this->search_long(str)){ - pch=this->show_str(); - Lst.add_key_count(pch); - i=strlen(pch)-1; - while(0search(str); - *(str+i)=' '; - if(j){ - pch=this->show_str(); - Lst.add_key_count(pch); - } - i--; - } - } - } - } -} - -void Partial_match::all_match(char *str,List &Lst){ - char *pch; - int i,j; - while(*str!='\0'){ - if(this->search_long(str)){ - pch=this->show_str(); - Lst.add_key_count(pch); - i=strlen(pch)-1; - while(0search(str); - *(str+i)=' '; - if(j){ - pch=this->show_str(); - Lst.add_key_count(pch); - } - i--; - } - } - } - if((pch=strchr(str,' '))!=NULL)str=pch+1; - else str=str+strlen(str); - } -} - -void Partial_match::long_match(char *str,Count &Cnt,long n){ - char *pch; - while(*str!='\0'){ - if(this->search_long(str)){ - pch=this->show_str(); - Cnt.add_count2(pch,n); - } - if((pch=strchr(str,' '))!=NULL)str=pch+1; - else str=str+strlen(str); - } -} - -void Partial_match::local_match(char *str,Count &Cnt,long n){ - char *pch; - int i,j; - if(*str!='\0'){ - if(this->search_long(str)){ - pch=this->show_str(); - Cnt.add_count2(pch,n); - i=strlen(pch)-1; - while(0search(str); - *(str+i)=' '; - if(j){ - pch=this->show_str(); - Cnt.add_count2(pch,n); - } - i--; - } - } - } - } -} - -void Partial_match::all_match(char *str,Count &Cnt,long n){ - char *pch; - int i,j; - while(*str!='\0'){ - if(this->search_long(str)){ - pch=this->show_str(); - Cnt.add_count2(pch,n); - i=strlen(pch)-1; - while(0search(str); - *(str+i)=' '; - if(j){ - pch=this->show_str(); - Cnt.add_count2(pch,n); - } - i--; - } - } - } - if((pch=strchr(str,' '))!=NULL)str=pch+1; - else str=str+strlen(str); - } -} - -int Partial_match::search_long(char *str){ - int a=0,b=0,i,j; - len=strlen(str); - if(this->step_one(a,b,str))return(1); - i=(asearch(str); - *(str+i)=' '; - if(j)return(1); - i--; - } - } - if(cln_o){ - depth=depth_o; - cnd[depth]=index_o; - return(1); - } - else return(0); -} - -int Partial_match::step_one(int &a,int &b,char *str){ - char c; - cln_o=0; - cln=0; - while((c=*(str+cln))&&c!=32)cln++; - *(str+cln)='\0'; - depth=-1; - Page *pu=root; - int i,j; - while(pu!=NULL){ - depth++; - pg[depth]=pu; - j=(pu->search)(a,b,str,i,this); - cnd[depth]=i; - if(j==1)return(1); - if(i==0)pu=pu->pdn; - else pu=(pu->pnd)[i-1]->pdn; - } - -if(cln +#include +#include +#include +#include +#include +#include +#include "Btree.h" +#include "runn.h" + +using namespace std; +namespace iret { + +Node::Node(void){ + str=NULL; + rel=NULL; + pdn=NULL; +} + +Node::Node(const char *ptr){ + int i=strlen(ptr); + str = new char[i+1]; + strcpy(str,ptr); + rel=NULL; + pdn=NULL; +} + +Node::Node(char const *ptr,void *dtr){ + int i=strlen(ptr); + str = new char[i+1]; + strcpy(str,ptr); + rel = dtr; + pdn=NULL; +} + +Node::~Node(){ + if(str)delete [] str; +} + +void Node::debug(void){ + cout << "Node {" << endl; + cout << " str: " << this->str << endl; + if(rel==NULL)cout << " rel: NULL" << endl; + else cout << " rel: " << (long)rel << endl; + if(pdn==NULL)cout << " pdn: NULL" << endl; + else cout << " pdn: " << (long)pdn << endl; + cout << " }" << endl; +} + +Page::Page(){ + pdn=NULL; + ndnm='\0'; +} + +Page::Page(Page *const pz,Page *const pn,const int n){ + pdn=pn; + int j=(int)(pz->ndnm)-n; + ndnm=(char)(j>0 ? j : 0); + for(int i=0;i<(int)ndnm;i++){pnd[i]=(pz->pnd)[n+i];} +} + +Page::~Page(){ + for(int i=0;i<(int)ndnm;i++){ + delete pnd[i]; + } +} + +void Page::clean(void){ + for(int i=0;i<(int)ndnm;i++){ + pnd[i]->str=NULL; + } +} + +void Page::insert(const int n,Node * const nd,const int j){ + assert(jn;i--)pnd[i]=pnd[i-1]; + pnd[n]=nd; + } + ndnm++; +} + +int Page::search(int &a,int &b,const char *str,int &p){ + int j; + if((j=stc_my(a,b,str,pnd[0]->str))<0){ + p=0; + return(0); + } + else if(j==0){ + p=0; + return(1); + } + if((j=stc_my(a,b,str,pnd[(int)(ndnm-1)]->str))>0){ + p=(int)ndnm; + return(0); + } + else if(j==0){ + p=(int)(ndnm-1); + return(1); + } + int x=0,i; + int y=(int)(ndnm-1); + while(y-x>1){ + i=(y+x)/2; + if((j=stc_my(a,b,str,pnd[i]->str))==0){p=i;return(1);} + else if(j<0)y=i; + else x=i; + } + p=y; + return(0); +} + +int Page::search(int &a,int &b,char *str,int &p,Partial_match *btr){ + int j; + if((j=btr->stc_my_long(a,b,str,pnd[0]->str,0))<0){ + p=0; + return(0); + } + else if(j==0){ + p=0; + return(1); + } + if((j=btr->stc_my_long(a,b,str,pnd[(int)(ndnm-1)]->str,(int)(ndnm-1)))>0){ + p=(int)ndnm; + return(0); + } + else if(j==0){ + p=(int)(ndnm-1); + return(1); + } + int x=0,i; + int y=(int)(ndnm-1); + while(y-x>1){ + i=(y+x)/2; + if((j=btr->stc_my_long(a,b,str,pnd[i]->str,i))==0){p=i;return(1);} + else if(j<0)y=i; + else x=i; + } + p=y; + return(0); +} + +void Page::debug(void){ + cout << "Page {" << endl; + cout << " ndnm: " << (int)ndnm << endl; + if(pdn==NULL)cout << " pdn: NULL" << endl; + else cout << " pdn: " << (long)pdn << endl; + for(int i=0;i<(int)ndnm;i++){ + cout << i << " "; + (this->pnd[i])->debug(); + } + cout << " }" << endl; +} + +int stc_my(int &a,int &b,const char *str,const char *ptr) + {register int i=(andnm = 1; + (root->pnd)[0]=new Node(""); +} + +int Btree::search(const char *str){ + depth=-1; + Page *pu=root; + register int a=0,b=0,i,j; + while(pu!=NULL){ + depth++; + pg[depth]=pu; + j=(pu->search)(a,b,str,i); + cnd[depth]=i; + if(j==1)return(1); + if(i==0)pu=pu->pdn; + else pu=(pu->pnd)[i-1]->pdn; + } +return(0); +} + +int Btree::insert(Node *nd){ + int w,k; + Page *pm,*pz; + while((nd!=NULL)&&(depth)){ + pm=pg[depth]; + w=pm->ndnm; + if(winsert(cnd[depth],nd,w); + nd=NULL; + } + else { + k=cnd[depth]; + if(kpnd)[order-1])->pdn,order); + pm->insert(k,nd,order); + nd=pm->pnd[order]; + nd->pdn=pz; + pm->ndnm=order; + } + else if(k>order){ + pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); + pz->insert(k-order-1,nd,order-1); + nd=pm->pnd[order]; + nd->pdn=pz; + pm->ndnm=order; + } + else { + pz=new Page(pm,nd->pdn,order); + nd->pdn=pz; + pm->ndnm=order; + } + } + depth--; + } + if(nd!=NULL){ + pm=pg[depth]; + w=pm->ndnm; + if(winsert(cnd[depth],nd,w); + else { + root=new Page(); + root->pdn=pm; + k=cnd[depth]; + if(kpnd)[order-1])->pdn,order); + pm->insert(k,nd,order); + (root->pnd)[0]=pm->pnd[order]; + ((root->pnd)[0])->pdn=pz; + root->ndnm=1; + pm->ndnm=order; + } + else if(k>order){ + pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); + pz->insert(k-order-1,nd,order-1); + (root->pnd)[0]=pm->pnd[order]; + ((root->pnd)[0])->pdn=pz; + root->ndnm=1; + pm->ndnm=order; + } + else { + pz=new Page(pm,nd->pdn,order); + (root->pnd)[0]=nd; + nd->pdn=pz; + root->ndnm=1; + pm->ndnm=order; + } + } + } +return(1); +} + +void Btree::node_first(void){ + depth=0; + pg[depth]=root; + cnd[depth]=0; + Page *pm; + while((pm=(pg[depth]->pdn))!=NULL){ + depth++; + pg[depth]=pm; + cnd[depth]=0; + } +} + +int Btree::node_next(){ + int i=cnd[depth]; + Page *pd=((pg[depth]->pnd)[i])->pdn; + if(pd!=NULL){ + (cnd[depth])++; + depth++; + pg[depth]=pd; + cnd[depth]=0; + while((pd=(pg[depth]->pdn))!=NULL){ + depth++; + pg[depth]=pd; + cnd[depth]=0; + } + } + else { + cnd[depth]=++i; + while((depth>=1)&&(i==(pg[depth]->ndnm))){depth--;i=cnd[depth];} + if((depth==0)&&(i==(pg[depth]->ndnm)))depth--; + if(depth<0)return(0); + } +return(1); +} + +char *Btree::show_str(){ + return(((pg[depth]->pnd)[cnd[depth]])->str); +} + +void *Btree::give_ptr(){ + return(((pg[depth]->pnd)[cnd[depth]])->rel); +} + +void Btree::set_ptr(void *dtr){ + ((pg[depth]->pnd)[cnd[depth]])->rel=dtr; +} + +Btree::~Btree(){ + int pflag=get_qflag(); + long k=0; + if (copy) return; // only delete original + if(!iclean){ + node_first(); + int i=depth,j; + do{ + j=node_next(); + if(depthdepth){ + delete pg[i]; + i--; + mark(pflag,++k,1000,"pages deleted"); + } + } + else i=depth; + } while(j); + } + else { + node_first(); + int i=depth,j; + do{ + j=node_next(); + if(depthdepth){ + pg[i]->clean(); + delete pg[i]; + i--; + mark(pflag,++k,1000,"pages deleted"); + } + } + else i=depth; + } while(j); + } +} + +long Btree::list_write(ofstream &fout){ + int pflag=get_qflag(); + long ct=0; + node_first(); + while(node_next()){ + fout << show_str() << endl; + mark(pflag,++ct,1000,"strings written"); + } + fout.close(); + return((int)fout.good()); +} + +Btree::Btree(ifstream &fin){ + copy=false; + char cnam[256]; + int pflag=get_qflag(); + depth=0; + pg[0]=root=new Page(); + cnd[0]=root->ndnm = 1; + (root->pnd)[0]=new Node(""); + Node *pno; + long ct=0; + while(get_string(cnam,fin,'\n')){ + pno = new Node(cnam); + add(pno); + mark(pflag,++ct,10000,"strings read"); + } + fin.close(); +} + +int Btree::add(Node *nd){ + int w,k,dp; + Page *pm,*pz; + dp=depth; //uses dp in place of depth in insert. + while((nd!=NULL)&&(dp)){ + pm=pg[dp]; + w=pm->ndnm; + if(winsert(cnd[dp],nd,w); + nd=NULL; + (cnd[dp])++; //variation from insert. + } + else { + k=cnd[dp]; + if(kpnd)[order-1])->pdn,order); + pm->insert(k,nd,order); + nd=pm->pnd[order]; + nd->pdn=pz; + pm->ndnm=order; + } + else if(k>order){ + pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); + pz->insert(k-order-1,nd,order-1); + nd=pm->pnd[order]; + nd->pdn=pz; + pm->ndnm=order; + } + else { + pz=new Page(pm,nd->pdn,order); + nd->pdn=pz; + pm->ndnm=order; + } + pg[dp]=pz; //2 lines of variation from insert. + cnd[dp]=order; + } + dp--; + } + if(nd!=NULL){ + pm=pg[dp]; + w=pm->ndnm; + if(winsert(cnd[dp],nd,w); + (cnd[dp])++; //variation from insert. + } + else { + root=new Page(); + root->pdn=pm; + k=cnd[dp]; + if(kpnd)[order-1])->pdn,order); + pm->insert(k,nd,order); + (root->pnd)[0]=pm->pnd[order]; + ((root->pnd)[0])->pdn=pz; + root->ndnm=1; + pm->ndnm=order; + } + else if(k>order){ + pz=new Page(pm,((pm->pnd)[order])->pdn,order+1); + pz->insert(k-order-1,nd,order-1); + (root->pnd)[0]=pm->pnd[order]; + ((root->pnd)[0])->pdn=pz; + root->ndnm=1; + pm->ndnm=order; + } + else { + pz=new Page(pm,nd->pdn,order); + (root->pnd)[0]=nd; + nd->pdn=pz; + root->ndnm=1; + pm->ndnm=order; + } + next_empty(); //variation from insert. + } + } +return(1); +} + +void Btree::next_empty(){ + depth=0; + pg[depth]=root; + int i=cnd[depth]=root->ndnm; + Page *pm; + while((pm=((pg[depth]->pnd)[i-1])->pdn)!=NULL){ + depth++; + pg[depth]=pm; + i=cnd[depth]=pm->ndnm; + } +} + +Str_str::Str_str() : Btree() { +} + +Str_str::~Str_str(){ + if(copy)return; + this->node_first(); + while(this->node_next())delete [] (char*)this->give_ptr(); +} + +void Str_str::add_pair(const char *one,const char *two){ + Node *pnd; + if(search(one)){ + cout << "Duplicate string in keys list = " << one << endl; + exit(0); + } + else { + int i=strlen(two); + char *st=new char[i+1]; + strcpy(st,two); + pnd=new Node(one,(void *)st); + add(pnd); + } +} + +char *Str_str::match(const char *one){ + if(search(one)){ + return((char*)give_ptr()); + } + else { + cout << "String not a key = " << one << endl; + exit(0); + } +} + +List::List() : Btree() { + cnt_key=0; +} + +List::~List(){ +} + +void List::add_key(const char *str){ + Node *pnd; + if(!search(str)){ + pnd=new Node(str); + add(pnd); + } +} + +void List::add_key_count(const char *str){ + Node *pnd; + if(!search(str)){ + pnd=new Node(str); + add(pnd); + cnt_key++; + } +} + +void List::addp_key_count(char *str){ + Node *pnd; + if(!search(str)){ + pnd=new Node; + pnd->str=str; + add(pnd); + cnt_key++; + } +} + +Num_num::Num_num() : Btree() { +} + +Num_num::~Num_num(){ + if(copy)return; + this->node_first(); + while(this->node_next())delete (long*)this->give_ptr(); +} + +void Num_num::add_pair(long i,long j){ + Node *pnd; + char cnam[256]; + long_str(cnam,i); + if(!search(cnam)){ + long *st=new long; + *st=j; + pnd=new Node(cnam,(void *)st); + add(pnd); + } +} + +long Num_num::match(long i){ + char cnam[256]; + long_str(cnam,i); + if(search(cnam)){ + return(*((long*)give_ptr())); + } + else return(LNEG); +} + +Count::Count() : List() { + total=0; +} + +Count::~Count(){ + if(copy)return; + long *pk; + this->node_first(); + while(this->node_next()){ + pk=(long*)(this->give_ptr()); + if(pk)delete pk; + } +} + +void Count::add_count(const char *pch,long n){ + long *ppt; + Node *np; + total+=n; + if(this->search(pch)==0){ + ppt = new long; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + } + else { + (*(long*) this->give_ptr())+=n; + } +} + +void Count::add_countz(const char *pch,long n){ + long *ppt; + Node *np; + if(this->search(pch)==0){ + ppt = new long; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + cnt_key++; + } + else { + (*(long*) this->give_ptr())+=n; + } +} + +void Count::add_count2(const char *pch,long n){ + long *ppt; + Node *np; + total+=n; + if(this->search(pch)==0){ + ppt = new long; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + cnt_key++; + } + else { + (*(long*) this->give_ptr())+=n; + } +} + +void Count::addp_count2(char *pch,long n){ + long *ppt; + Node *np; + total+=n; + if(this->search(pch)==0){ + ppt = new long; + (*ppt) =n; + np=new Node; + np->str=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + (*(long*) this->give_ptr())+=n; + } +} + +void Count::correct(const char *pch,long n){ + if(this->search(pch)){ + (*(long*) this->give_ptr())=n; + } +} + +long Count::count(const char *pch){ + if(this->search(pch)==0){ + return(0); + } + else { + return(*((long*) this->give_ptr())); + } +} + +long Count::count(void){ + return(*((long*) this->give_ptr())); +} + +void Count::max_count(const char *pch,long n){ + long *ppt,i; + Node *np; + total+=n; + if(!search(pch)){ + ppt = new long; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + } + else { + ppt=(long*)give_ptr(); + if(*pptinsert(np); + cnt_key++; + } + else { + ppt=(long*)give_ptr(); + if(*pptstr=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + ppt=(long*)give_ptr(); + if(*pptinsert(np); + } + else { + ppt=(long*)give_ptr(); + if(*ppt>n)*ppt=n; + } +} + +void Count::min_count2(const char *pch,long n){ + long *ppt,i; + Node *np; + total+=n; + if(!search(pch)){ + ppt = new long; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + cnt_key++; + } + else { + ppt=(long*)give_ptr(); + if(*ppt>n)*ppt=n; + } +} + +void Count::minp_count2(char *pch,long n){ + long *ppt,i; + Node *np; + total+=n; + if(!search(pch)){ + ppt = new long; + (*ppt) =n; + np=new Node; + np->str=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + ppt=(long*)give_ptr(); + if(*ppt>n)*ppt=n; + } +} + +//FCount (float count tree) + +FCount::FCount() : List() { + total=0; +} + +FCount::~FCount(){ + if(copy)return; + float *pk; + this->node_first(); + while(this->node_next()){ + pk=(float*)(this->give_ptr()); + if(pk)delete pk; + } +} + +void FCount::Copy(FCount &Fc){ + char *pch; + float *xx,*zz; + Node *pN; + + pg[0]=root; + cnd[0]=root->ndnm; + + Fc.node_first(); + while(Fc.node_next()){ + pch=Fc.show_str(); + xx=(float*)Fc.give_ptr(); + zz=new float; + *zz=*xx; + pN=new Node(pch,(void*)zz); + add(pN); + } +} + +void FCount::add_count(const char *pch,float z){ + float *ppt; + Node *np; + total+=z; + if(this->search(pch)==0){ + ppt = new float; + (*ppt) =z; + np=new Node(pch,(void*)ppt); + this->insert(np); + } + else { + (*(float*) this->give_ptr())+=z; + } +} + +void FCount::add_count2(const char *pch,float z){ + float *ppt; + Node *np; + total+=z; + if(this->search(pch)==0){ + ppt = new float; + (*ppt) =z; + np=new Node(pch,(void*)ppt); + this->insert(np); + cnt_key++; + } + else { + (*(float*) this->give_ptr())+=z; + } +} + +void FCount::addp_count2(char *pch,float z){ + float *ppt; + Node *np; + total+=z; + if(this->search(pch)==0){ + ppt = new float; + (*ppt) =z; + np=new Node; + np->str=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + (*(float*) this->give_ptr())+=z; + } +} + +float FCount::count(const char *pch){ + if(this->search(pch)==0){ + return(0); + } + else { + return(*((float*) this->give_ptr())); + } +} + +float FCount::count(void){ + return(*((float*) this->give_ptr())); +} + +//DCount (double precision count tree) + +DCount::DCount() : List() { + total=0; +} + +DCount::~DCount(){ + if(copy)return; + double *pk; + this->node_first(); + while(this->node_next()){ + pk=(double*)(this->give_ptr()); + if(pk)delete pk; + } +} + +void DCount::Copy(DCount &Dc){ + char *pch; + double *xx,*zz; + Node *pN; + + pg[0]=root; + cnd[0]=root->ndnm; + + Dc.node_first(); + while(Dc.node_next()){ + pch=Dc.show_str(); + xx=(double*)Dc.give_ptr(); + zz=new double; + *zz=*xx; + pN=new Node(pch,(void*)zz); + add(pN); + } +} + +void DCount::add_count(const char *pch,double z){ + double *ppt; + Node *np; + total+=z; + if(this->search(pch)==0){ + ppt = new double; + (*ppt) =z; + np=new Node(pch,(void*)ppt); + this->insert(np); + } + else { + (*(double*) this->give_ptr())+=z; + } +} + +void DCount::add_count2(const char *pch,double z){ + double *ppt; + Node *np; + total+=z; + if(this->search(pch)==0){ + ppt = new double; + (*ppt) =z; + np=new Node(pch,(void*)ppt); + this->insert(np); + cnt_key++; + } + else { + (*(double*) this->give_ptr())+=z; + } +} + +void DCount::addp_count2(char *pch,double z){ + double *ppt; + Node *np; + total+=z; + if(this->search(pch)==0){ + ppt = new double; + (*ppt) =z; + np=new Node; + np->str=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + (*(double*) this->give_ptr())+=z; + } +} + +double DCount::count(const char *pch){ + if(this->search(pch)==0){ + return(0); + } + else { + return(*((double*) this->give_ptr())); + } +} + +double DCount::count(void){ + return(*((double*) this->give_ptr())); +} + +void DCount::max_count(const char *pch,double z){ + double *ppt; + Node *np; + total+=z; + if(!search(pch)){ + ppt = new double; + (*ppt) =z; + np=new Node(pch,(void*)ppt); + this->insert(np); + } + else { + ppt=(double*)give_ptr(); + if(*pptinsert(np); + cnt_key++; + } + else { + ppt=(double*)give_ptr(); + if(*pptstr=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + ppt=(double*)give_ptr(); + if(*pptinsert(np); + } + else { + ppt=(double*)give_ptr(); + if(*ppt>z)*ppt=z; + } +} + +void DCount::min_count2(const char *pch,double z){ + double *ppt; + Node *np; + total+=z; + if(!search(pch)){ + ppt = new double; + (*ppt) =z; + np=new Node(pch,(void*)ppt); + this->insert(np); + cnt_key++; + } + else { + ppt=(double*)give_ptr(); + if(*ppt>z)*ppt=z; + } +} + +void DCount::minp_count2(char *pch,double z){ + double *ppt; + Node *np; + total+=z; + if(!search(pch)){ + ppt = new double; + (*ppt) =z; + np=new Node; + np->str=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + ppt=(double*)give_ptr(); + if(*ppt>z)*ppt=z; + } +} + +void DCount::debug(void){ + node_first(); + while(node_next()){ + cout << count() << " " << show_str() << endl; + } +} + +//Partial Match + +Partial_match::Partial_match() : Count() { +} + +Partial_match::~Partial_match(){ +} + +void Partial_match::long_match(char *str,List &Lst){ + char *pch; + while(*str!='\0'){ + if(this->search_long(str)){ + pch=this->show_str(); + Lst.add_key_count(pch); + } + if((pch=strchr(str,' '))!=NULL)str=pch+1; + else str=str+strlen(str); + } +} + +void Partial_match::local_match(char *str,List &Lst){ + char *pch; + int i,j; + if(*str!='\0'){ + if(this->search_long(str)){ + pch=this->show_str(); + Lst.add_key_count(pch); + i=strlen(pch)-1; + while(0search(str); + *(str+i)=' '; + if(j){ + pch=this->show_str(); + Lst.add_key_count(pch); + } + i--; + } + } + } + } +} + +void Partial_match::all_match(char *str,List &Lst){ + char *pch; + int i,j; + while(*str!='\0'){ + if(this->search_long(str)){ + pch=this->show_str(); + Lst.add_key_count(pch); + i=strlen(pch)-1; + while(0search(str); + *(str+i)=' '; + if(j){ + pch=this->show_str(); + Lst.add_key_count(pch); + } + i--; + } + } + } + if((pch=strchr(str,' '))!=NULL)str=pch+1; + else str=str+strlen(str); + } +} + +void Partial_match::long_match(char *str,Count &Cnt,long n){ + char *pch; + while(*str!='\0'){ + if(this->search_long(str)){ + pch=this->show_str(); + Cnt.add_count2(pch,n); + } + if((pch=strchr(str,' '))!=NULL)str=pch+1; + else str=str+strlen(str); + } +} + +void Partial_match::local_match(char *str,Count &Cnt,long n){ + char *pch; + int i,j; + if(*str!='\0'){ + if(this->search_long(str)){ + pch=this->show_str(); + Cnt.add_count2(pch,n); + i=strlen(pch)-1; + while(0search(str); + *(str+i)=' '; + if(j){ + pch=this->show_str(); + Cnt.add_count2(pch,n); + } + i--; + } + } + } + } +} + +void Partial_match::all_match(char *str,Count &Cnt,long n){ + char *pch; + int i,j; + while(*str!='\0'){ + if(this->search_long(str)){ + pch=this->show_str(); + Cnt.add_count2(pch,n); + i=strlen(pch)-1; + while(0search(str); + *(str+i)=' '; + if(j){ + pch=this->show_str(); + Cnt.add_count2(pch,n); + } + i--; + } + } + } + if((pch=strchr(str,' '))!=NULL)str=pch+1; + else str=str+strlen(str); + } +} + +int Partial_match::search_long(char *str){ + int a=0,b=0,i,j; + len=strlen(str); + if(this->step_one(a,b,str))return(1); + i=(asearch(str); + *(str+i)=' '; + if(j)return(1); + i--; + } + } + if(cln_o){ + depth=depth_o; + cnd[depth]=index_o; + return(1); + } + else return(0); +} + +int Partial_match::step_one(int &a,int &b,char *str){ + char c; + cln_o=0; + cln=0; + while((c=*(str+cln))&&c!=32)cln++; + *(str+cln)='\0'; + depth=-1; + Page *pu=root; + int i,j; + while(pu!=NULL){ + depth++; + pg[depth]=pu; + j=(pu->search)(a,b,str,i,this); + cnd[depth]=i; + if(j==1)return(1); + if(i==0)pu=pu->pdn; + else pu=(pu->pnd)[i-1]->pdn; + } + +if(cln -#include -using namespace std; -namespace iret { - -const int order = 5; //Half the order of the Btree that we build. -const int height_limit =12; //Limit on the height of the Btree. -const int ord2 = order*2; //The order of the Btree. - -int stc_my(int &,int &,const char *,const char *); //Function used to compare - //two strings. The first two arguments hold information about how much the - //string can be ignored in the comparison. - -class Page; //forward declaration -class Btree; //forward declaration -class Partial_match; //forward declaration - -class Node { - friend int stc_my(int &,int &,const char *,const char *); - friend class Page; - friend class Btree; - friend class List; - friend class Count; - friend class FCount; - friend class DCount; - template friend class BCount; - friend class Partial_match; - friend class Thes; -public: - Node(void); //Sets all points to NULL. - Node(const char * ); //Argument is the string for this node. - Node(const char * ,void *); //Arguments are first the string and then the - //data pointer. - ~Node(); - void debug(); //Prints out the node in simple format. -private: - char *str; //String pointer. - void *rel; //Data pointer. - Page *pdn; //Points down to the page below or to NULL. -}; - -class Page { - friend int stc_my(int &,int &,const char *,const char *); - friend class Btree; - friend class Partial_match; - friend class FCount; - friend class DCount; -public: - Page(); //Constructs a new empty page. Only happens at the root. - Page(Page * const pz,Page * const pn,const int n); //Constructs a page that - //holds the right half of a full page. The full page is pointed at by the - //pz. The new pages downward pointer is set to pn. - //n tells how much of the full page is to remain or where to begin removal. - ~Page(); - void clean(void); //Used to delete without touching search keys in the nodes - //which were created with addp functions and do not belong to the tree. - void insert(const int n,Node * const nd,const int j); //inserts in partially empty - //page. n is insertion point, j is number of nodes on page that are viable. - int search(int &a,int &b,const char *,int &p); //searches for string on - //the page. Returns 1 if found, 0 otherwise. If found p is the index, otherwise - //if p is 0 then the page downward pointer is to next page to search, but if - //p is positive then p-1 is number of node that has the downward pointer to - //next page to search. - int search(int &a,int &b,char *,int &p,Partial_match *btr); //Looks for longest - //partial match. - void debug(); //Prints out the page for debugging purposes. - -private: - char ndnm; //Indicates the number of Nodes on the page. - Page *pdn; //Pointer that points to the page below and also lexically below. - //May be NULL. - Node *pnd[ord2]; //Pointers to the nodes on the page. Some may be NULL. -}; - -class Btree { - friend class Page; -public: - Btree(void); - Btree(ifstream &); //Reads in a Btree in form of list written out by - //list_write() from disc. String arguments mark the path in proj file. - Btree( const Btree & btree ) {copy = true; root = btree.root;} // Actually - // creates another reference to the same tree. Take great care to - // avoid simultaneously modifying both copies. - ~Btree(void); - int search(const char *); //Searches for a string and sets the path to that - //string or its insertion point. - int insert(Node *);//Only to be called after a search has failed to find the - //string. - void node_first();//Finds the first node in the tree and sets the path to it. - int node_next(); //Given the path is already set to a node, this function - //finds the next node in lexicographic order. - char *show_str();//Used to show the string after a call to next is successful. - void *give_ptr();//Used to give the data pointer in the current node. - void set_ptr(void *); //Used to set the data pointer after a call to search - //has found string. - int add(Node *); //Only to be used to construct a tree from a lexical list - //as written out by list_write(); - void next_empty(); //Only used to reset the pointer arrays when the root is - //split. Used in add(). - long list_write(ofstream &); //Writes out a lexical list of the strings in - //the tree. - int iclean; //Default 0, but set to 1 if want to have destructor run without - //touching key strings (if addp used in making tree). -protected: - int depth; //Tells the depth in the tree that marks the current location. - Page *root; //Points at the root page of the tree. - Page *pg[height_limit]; //Descending list of pointers that mark the pages. - int cnd[height_limit]; //Mark the positions of the nodes just above the - //downard page pointer at each level. Thus 0 marks the page's downward - //pointer, but a nonzero value must have 1 subtracted and then it gives - //the node whose downward pointer is the correct downward pointer. - bool copy; //flags copies of a tree with true. -}; - -class List : public Btree { -public: - List(); - List(const List & list) : Btree(list) {} - ~List(); - void add_key(const char *str); //Adds the string *str to the tree if not already in list - void add_key_count(const char *str); //Adds the string *str to the tree if - //not already in list and counts it. - void addp_key_count(char *str); //Adds the string *str to the tree if - //not already in list and counts it. Uses the actual string pointer instead - //of making a copy - long cnt_key; //Used to count the number of keys. -}; - -class Count : public List { -public: - Count(); - Count(const Count & Ct) : List(Ct){} - ~Count(); - void add_count(const char *str,long n); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is incremented by n. - void add_countz(const char *str,long n); //Adds the string *str with its count - //just as add_count, but also counts number of unique keys in count. - //Does not add count to the total variable, unlike add_count2. - void add_count2(const char *str,long n); //Adds the string *str with its count - //just as add_count, but also counts number of unique keys in count. - void addp_count2(char *str,long n); //Adds the string *str with its count - //just as add_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - void correct(const char *str,long n); //If str is in the tree the count is - //changed to n. Otherwise nothing is done. - - //Functions for maximum calculation - void max_count(const char *str,long n); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is max of n and prior value. - void max_count2(const char *str,long n); //Adds the string *str with its count - //just as max_count, but also counts number of unique keys in count. - void maxp_count2(char *str,long n); //Adds the string *str with its count - //just as max_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - - //Functions for minium calculation - void min_count(const char *str,long n); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is min of n and prior value. - void min_count2(const char *str,long n); //Adds the string *str with its count - //just as min_count, but also counts number of unique keys in count. - void minp_count2(char *str,long n); //Adds the string *str with its count - //just as min_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - - long count(const char *str); //Returns the count if a key (in list) otherwise - //returns 0. - long count(void); //Returns the count of the current string. Assumes the - //pointers have already been set by a search or node_next call. - long total; //Holds the total of all counts added for all keys. -}; - -class FCount : public List { -public: - FCount(); - FCount(const FCount & Ct) : List(Ct){} - ~FCount(); - void Copy(FCount &Dc); //Makes a copy of the tree Dc in the current tree. - void add_count(const char *str,float z); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is incremented by z. - void add_count2(const char *str,float z); //Adds the string *str with its count - //just as add_count, but also counts number of unique keys in count. - void addp_count2(char *str,float z); //Adds the string *str with its count - //just as add_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - float count(const char *str); //Returns the count if a key (in list) otherwise - //returns 0. - float count(void); //Returns the count of the current string. Assumes the - //pointers have already been set by a search or node_next call. - float total; //Holds the total of all counts added for all keys. -}; - -class DCount : public List { -public: - DCount(); - DCount(const DCount & Ct) : List(Ct){} - ~DCount(); - void Copy(DCount &Dc); //Makes a copy of the tree Dc in the current tree. - void add_count(const char *str,double z); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is incremented by z. - void add_count2(const char *str,double z); //Adds the string *str with its count - //just as add_count, but also counts number of unique keys in count. - void addp_count2(char *str,double z); //Adds the string *str with its count - //just as add_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - double count(const char *str); //Returns the count if a key (in list) otherwise - //returns 0. - double count(void); //Returns the count of the current string. Assumes the - //pointers have already been set by a search or node_next call. - - //Functions for maximum calculation - void max_count(const char *str,double z); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is max of z and prior value. - void max_count2(const char *str,double z); //Adds the string *str with its count - //just as max_count, but also counts number of unique keys in count. - void maxp_count2(char *str,double z); //Adds the string *str with its count - //just as max_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - - //Functions for minium calculation - void min_count(const char *str,double z); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is min of z and prior value. - void min_count2(const char *str,double z); //Adds the string *str with its count - //just as min_count, but also counts number of unique keys in count. - void minp_count2(char *str,double z); //Adds the string *str with its count - //just as min_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - - void debug(void); //Prints to stdout a list "i str[i]" - double total; //Holds the total of all counts added for all keys. -}; - -class Partial_match : public Count { - friend class Page; -public: - Partial_match(); - Partial_match(const Partial_match & Par_mat) : Count(Par_mat){} - ~Partial_match(); - void long_match(char *,List &); //Finds the longest matches for all word - //starts in the string and adds them to the list. - void local_match(char *,List &); //Finds all matches that start at - //beginning of the string and adds them to the list. - void all_match(char *,List &); //Finds all matches within the string and - //adds them to the list. - void long_match(char *,Count &,long n); //Finds the longest matches for all word - //starts in the string and adds them to the list in Count. - void local_match(char *,Count &,long n); //Finds all matches that start at - //beginning of string and adds them to the list in Count. - void all_match(char *,Count &,long n); //Finds all matches within the string and - //adds them to the list in Count. - int search_long(char *); //Searches for longest partial match to an initial - //segment of a string that ends at a word boundary and - //sets the path to that string or its insertion point. - -private: - int stc_my_long(int &,int &,char *,const char *,int); //Function used to compare - //two strings. The first two arguments hold information about how much the - //string can be ignored in the comparison. The last argument holds the index - //or number of the string's node on the page. - int step_one(int &,int &,char *); //Looks for partial or complete match and - //returns 1 if complete found. Partial is reflected in parameters. - - //Special parameters used in partial matching. - int depth_o; //Depth of longest partial match thus far. - int index_o; //index of longest partial match thus far. - int cln_o; //String length of longest partial match thus far. - int len; //Length of query string. - int cln; //Current null position in string. -}; - -class Str_str : public Btree { -public: - Str_str(); - Str_str(const Str_str & Stst) : Btree(Stst){} - ~Str_str(); - void add_pair(const char *one,const char *two); //Adds the string *one to the tree and stores - //the string *two at that node. - char *match(const char *one); //Returns pointer to the string stored under string *one. -}; - -class Num_num : public Btree { -public: - Num_num(); - Num_num(const Num_num & Nmnm) : Btree(Nmnm){} - ~Num_num(); - void add_pair(long i, long j); //Adds the string for i to the tree and - //stores the number j at that node. - long match(long i); //Returns the number stored under the string for i. -}; - -template -class BCount : public List { -public: - BCount(); - BCount(const BCount & Ct) : List(Ct){} - ~BCount(); - void add_count(const char *str,Z n); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is incremented by n. - void add_count2(const char *str,Z n); //Adds the string *str with its count - //just as add_count, but also counts number of unique keys in count. - void addp_count2(char *str,Z n); //Adds the string *str with its count - //just as add_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - void correct(const char *str,Z n); //If str is in the tree the count is - //changed to n. Otherwise nothing is done. - - //Functions for maximum calculation - void max_count(const char *str,Z n); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is max of n and prior value. - void max_count2(const char *str,Z n); //Adds the string *str with its count - //just as max_count, but also counts number of unique keys in count. - void maxp_count2(char *str,Z n); //Adds the string *str with its count - //just as max_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - - //Functions for minium calculation - void min_count(const char *str,Z n); //Adds the string *str with its count - //to the tree if not already in list. String is key and count is data. - //If string is already a key the count is min of n and prior value. - void min_count2(const char *str,Z n); //Adds the string *str with its count - //just as min_count, but also counts number of unique keys in count. - void minp_count2(char *str,Z n); //Adds the string *str with its count - //just as min_count, but also counts number of unique keys in count. - //Does not make copy of string, but uses the pointer str as key pointer. - - Z count(const char *str); //Returns the count if a key (in list) otherwise - //returns 0. - Z count(void); //Returns the count of the current string. Assumes the - //pointers have already been set by a search or node_next call. - Z total; //Holds the total of all counts added for all keys. -}; - -template -BCount::BCount() : List() { - total=0; -} - -template -BCount::~BCount(){ - if(copy)return; - Z *pk; - this->node_first(); - while(this->node_next()){ - pk=(Z *)(this->give_ptr()); - if(pk)delete pk; - } -} - -template -void BCount::add_count(const char *pch,Z n){ - Z *ppt; - Node *np; - total+=n; - if(this->search(pch)==0){ - ppt = new Z; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - } - else { - (*(Z *) this->give_ptr())+=n; - } -} - -template -void BCount::add_count2(const char *pch,Z n){ - Z *ppt; - Node *np; - total+=n; - if(this->search(pch)==0){ - ppt = new Z; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - cnt_key++; - } - else { - (*(Z *) this->give_ptr())+=n; - } -} - -template -void BCount::addp_count2(char *pch,Z n){ - Z *ppt; - Node *np; - total+=n; - if(this->search(pch)==0){ - ppt = new Z; - (*ppt) =n; - np=new Node; - np->str=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - (*(Z *) this->give_ptr())+=n; - } -} - -template -void BCount::correct(const char *pch,Z n){ - if(this->search(pch)){ - (*(Z *) this->give_ptr())=n; - } -} - -template -Z BCount::count(const char *pch){ - if(this->search(pch)==0){ - return(0); - } - else { - return(*((Z *) this->give_ptr())); - } -} - -template -Z BCount::count(void){ - return(*((Z *) this->give_ptr())); -} - -template -void BCount::max_count(const char *pch,Z n){ - Z *ppt,i; - Node *np; - total+=n; - if(!search(pch)){ - ppt = new Z; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - } - else { - ppt=(Z *)give_ptr(); - if(*ppt -void BCount::max_count2(const char *pch,Z n){ - Z *ppt,i; - Node *np; - total+=n; - if(!search(pch)){ - ppt = new Z; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - cnt_key++; - } - else { - ppt=(Z *)give_ptr(); - if(*ppt -void BCount::maxp_count2(char *pch,Z n){ - Z *ppt,i; - Node *np; - total+=n; - if(!search(pch)){ - ppt = new Z; - (*ppt) =n; - np=new Node; - np->str=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - ppt=(Z *)give_ptr(); - if(*ppt -void BCount::min_count(const char *pch,Z n){ - Z *ppt,i; - Node *np; - total+=n; - if(!search(pch)){ - ppt = new Z; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - } - else { - ppt=(Z *)give_ptr(); - if(*ppt>n)*ppt=n; - } -} - -template -void BCount::min_count2(const char *pch,Z n){ - Z *ppt,i; - Node *np; - total+=n; - if(!search(pch)){ - ppt = new Z; - (*ppt) =n; - np=new Node(pch,(void*)ppt); - this->insert(np); - cnt_key++; - } - else { - ppt=(Z *)give_ptr(); - if(*ppt>n)*ppt=n; - } -} - -template -void BCount::minp_count2(char *pch,Z n){ - Z *ppt,i; - Node *np; - total+=n; - if(!search(pch)){ - ppt = new Z; - (*ppt) =n; - np=new Node; - np->str=pch; - np->rel=ppt; - this->insert(np); - cnt_key++; - } - else { - ppt=(Z *)give_ptr(); - if(*ppt>n)*ppt=n; - } -} - -} -#endif +#ifndef BTREE_H +#define BTREE_H + +#define LNEG -100000000 + +#include +#include +using namespace std; +namespace iret { + +const int order = 5; //Half the order of the Btree that we build. +const int height_limit =12; //Limit on the height of the Btree. +const int ord2 = order*2; //The order of the Btree. + +int stc_my(int &,int &,const char *,const char *); //Function used to compare + //two strings. The first two arguments hold information about how much the + //string can be ignored in the comparison. + +class Page; //forward declaration +class Btree; //forward declaration +class Partial_match; //forward declaration + +class Node { + friend int stc_my(int &,int &,const char *,const char *); + friend class Page; + friend class Btree; + friend class List; + friend class Count; + friend class FCount; + friend class DCount; + template friend class BCount; + friend class Partial_match; + friend class Thes; +public: + Node(void); //Sets all points to NULL. + Node(const char * ); //Argument is the string for this node. + Node(const char * ,void *); //Arguments are first the string and then the + //data pointer. + ~Node(); + void debug(); //Prints out the node in simple format. +private: + char *str; //String pointer. + void *rel; //Data pointer. + Page *pdn; //Points down to the page below or to NULL. +}; + +class Page { + friend int stc_my(int &,int &,const char *,const char *); + friend class Btree; + friend class Partial_match; + friend class FCount; + friend class DCount; +public: + Page(); //Constructs a new empty page. Only happens at the root. + Page(Page * const pz,Page * const pn,const int n); //Constructs a page that + //holds the right half of a full page. The full page is pointed at by the + //pz. The new pages downward pointer is set to pn. + //n tells how much of the full page is to remain or where to begin removal. + ~Page(); + void clean(void); //Used to delete without touching search keys in the nodes + //which were created with addp functions and do not belong to the tree. + void insert(const int n,Node * const nd,const int j); //inserts in partially empty + //page. n is insertion point, j is number of nodes on page that are viable. + int search(int &a,int &b,const char *,int &p); //searches for string on + //the page. Returns 1 if found, 0 otherwise. If found p is the index, otherwise + //if p is 0 then the page downward pointer is to next page to search, but if + //p is positive then p-1 is number of node that has the downward pointer to + //next page to search. + int search(int &a,int &b,char *,int &p,Partial_match *btr); //Looks for longest + //partial match. + void debug(); //Prints out the page for debugging purposes. + +private: + char ndnm; //Indicates the number of Nodes on the page. + Page *pdn; //Pointer that points to the page below and also lexically below. + //May be NULL. + Node *pnd[ord2]; //Pointers to the nodes on the page. Some may be NULL. +}; + +class Btree { + friend class Page; +public: + Btree(void); + Btree(ifstream &); //Reads in a Btree in form of list written out by + //list_write() from disc. String arguments mark the path in proj file. + Btree( const Btree & btree ) {copy = true; root = btree.root;} // Actually + // creates another reference to the same tree. Take great care to + // avoid simultaneously modifying both copies. + ~Btree(void); + int search(const char *); //Searches for a string and sets the path to that + //string or its insertion point. + int insert(Node *);//Only to be called after a search has failed to find the + //string. + void node_first();//Finds the first node in the tree and sets the path to it. + int node_next(); //Given the path is already set to a node, this function + //finds the next node in lexicographic order. + char *show_str();//Used to show the string after a call to next is successful. + void *give_ptr();//Used to give the data pointer in the current node. + void set_ptr(void *); //Used to set the data pointer after a call to search + //has found string. + int add(Node *); //Only to be used to construct a tree from a lexical list + //as written out by list_write(); + void next_empty(); //Only used to reset the pointer arrays when the root is + //split. Used in add(). + long list_write(ofstream &); //Writes out a lexical list of the strings in + //the tree. + int iclean; //Default 0, but set to 1 if want to have destructor run without + //touching key strings (if addp used in making tree). +protected: + int depth; //Tells the depth in the tree that marks the current location. + Page *root; //Points at the root page of the tree. + Page *pg[height_limit]; //Descending list of pointers that mark the pages. + int cnd[height_limit]; //Mark the positions of the nodes just above the + //downard page pointer at each level. Thus 0 marks the page's downward + //pointer, but a nonzero value must have 1 subtracted and then it gives + //the node whose downward pointer is the correct downward pointer. + bool copy; //flags copies of a tree with true. +}; + +class List : public Btree { +public: + List(); + List(const List & list) : Btree(list) {} + ~List(); + void add_key(const char *str); //Adds the string *str to the tree if not already in list + void add_key_count(const char *str); //Adds the string *str to the tree if + //not already in list and counts it. + void addp_key_count(char *str); //Adds the string *str to the tree if + //not already in list and counts it. Uses the actual string pointer instead + //of making a copy + long cnt_key; //Used to count the number of keys. +}; + +class Count : public List { +public: + Count(); + Count(const Count & Ct) : List(Ct){} + ~Count(); + void add_count(const char *str,long n); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is incremented by n. + void add_countz(const char *str,long n); //Adds the string *str with its count + //just as add_count, but also counts number of unique keys in count. + //Does not add count to the total variable, unlike add_count2. + void add_count2(const char *str,long n); //Adds the string *str with its count + //just as add_count, but also counts number of unique keys in count. + void addp_count2(char *str,long n); //Adds the string *str with its count + //just as add_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + void correct(const char *str,long n); //If str is in the tree the count is + //changed to n. Otherwise nothing is done. + + //Functions for maximum calculation + void max_count(const char *str,long n); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is max of n and prior value. + void max_count2(const char *str,long n); //Adds the string *str with its count + //just as max_count, but also counts number of unique keys in count. + void maxp_count2(char *str,long n); //Adds the string *str with its count + //just as max_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + + //Functions for minium calculation + void min_count(const char *str,long n); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is min of n and prior value. + void min_count2(const char *str,long n); //Adds the string *str with its count + //just as min_count, but also counts number of unique keys in count. + void minp_count2(char *str,long n); //Adds the string *str with its count + //just as min_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + + long count(const char *str); //Returns the count if a key (in list) otherwise + //returns 0. + long count(void); //Returns the count of the current string. Assumes the + //pointers have already been set by a search or node_next call. + long total; //Holds the total of all counts added for all keys. +}; + +class FCount : public List { +public: + FCount(); + FCount(const FCount & Ct) : List(Ct){} + ~FCount(); + void Copy(FCount &Dc); //Makes a copy of the tree Dc in the current tree. + void add_count(const char *str,float z); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is incremented by z. + void add_count2(const char *str,float z); //Adds the string *str with its count + //just as add_count, but also counts number of unique keys in count. + void addp_count2(char *str,float z); //Adds the string *str with its count + //just as add_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + float count(const char *str); //Returns the count if a key (in list) otherwise + //returns 0. + float count(void); //Returns the count of the current string. Assumes the + //pointers have already been set by a search or node_next call. + float total; //Holds the total of all counts added for all keys. +}; + +class DCount : public List { +public: + DCount(); + DCount(const DCount & Ct) : List(Ct){} + ~DCount(); + void Copy(DCount &Dc); //Makes a copy of the tree Dc in the current tree. + void add_count(const char *str,double z); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is incremented by z. + void add_count2(const char *str,double z); //Adds the string *str with its count + //just as add_count, but also counts number of unique keys in count. + void addp_count2(char *str,double z); //Adds the string *str with its count + //just as add_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + double count(const char *str); //Returns the count if a key (in list) otherwise + //returns 0. + double count(void); //Returns the count of the current string. Assumes the + //pointers have already been set by a search or node_next call. + + //Functions for maximum calculation + void max_count(const char *str,double z); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is max of z and prior value. + void max_count2(const char *str,double z); //Adds the string *str with its count + //just as max_count, but also counts number of unique keys in count. + void maxp_count2(char *str,double z); //Adds the string *str with its count + //just as max_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + + //Functions for minium calculation + void min_count(const char *str,double z); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is min of z and prior value. + void min_count2(const char *str,double z); //Adds the string *str with its count + //just as min_count, but also counts number of unique keys in count. + void minp_count2(char *str,double z); //Adds the string *str with its count + //just as min_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + + void debug(void); //Prints to stdout a list "i str[i]" + double total; //Holds the total of all counts added for all keys. +}; + +class Partial_match : public Count { + friend class Page; +public: + Partial_match(); + Partial_match(const Partial_match & Par_mat) : Count(Par_mat){} + ~Partial_match(); + void long_match(char *,List &); //Finds the longest matches for all word + //starts in the string and adds them to the list. + void local_match(char *,List &); //Finds all matches that start at + //beginning of the string and adds them to the list. + void all_match(char *,List &); //Finds all matches within the string and + //adds them to the list. + void long_match(char *,Count &,long n); //Finds the longest matches for all word + //starts in the string and adds them to the list in Count. + void local_match(char *,Count &,long n); //Finds all matches that start at + //beginning of string and adds them to the list in Count. + void all_match(char *,Count &,long n); //Finds all matches within the string and + //adds them to the list in Count. + int search_long(char *); //Searches for longest partial match to an initial + //segment of a string that ends at a word boundary and + //sets the path to that string or its insertion point. + +private: + int stc_my_long(int &,int &,char *,const char *,int); //Function used to compare + //two strings. The first two arguments hold information about how much the + //string can be ignored in the comparison. The last argument holds the index + //or number of the string's node on the page. + int step_one(int &,int &,char *); //Looks for partial or complete match and + //returns 1 if complete found. Partial is reflected in parameters. + + //Special parameters used in partial matching. + int depth_o; //Depth of longest partial match thus far. + int index_o; //index of longest partial match thus far. + int cln_o; //String length of longest partial match thus far. + int len; //Length of query string. + int cln; //Current null position in string. +}; + +class Str_str : public Btree { +public: + Str_str(); + Str_str(const Str_str & Stst) : Btree(Stst){} + ~Str_str(); + void add_pair(const char *one,const char *two); //Adds the string *one to the tree and stores + //the string *two at that node. + char *match(const char *one); //Returns pointer to the string stored under string *one. +}; + +class Num_num : public Btree { +public: + Num_num(); + Num_num(const Num_num & Nmnm) : Btree(Nmnm){} + ~Num_num(); + void add_pair(long i, long j); //Adds the string for i to the tree and + //stores the number j at that node. + long match(long i); //Returns the number stored under the string for i. +}; + +template +class BCount : public List { +public: + BCount(); + BCount(const BCount & Ct) : List(Ct){} + ~BCount(); + void add_count(const char *str,Z n); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is incremented by n. + void add_count2(const char *str,Z n); //Adds the string *str with its count + //just as add_count, but also counts number of unique keys in count. + void addp_count2(char *str,Z n); //Adds the string *str with its count + //just as add_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + void correct(const char *str,Z n); //If str is in the tree the count is + //changed to n. Otherwise nothing is done. + + //Functions for maximum calculation + void max_count(const char *str,Z n); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is max of n and prior value. + void max_count2(const char *str,Z n); //Adds the string *str with its count + //just as max_count, but also counts number of unique keys in count. + void maxp_count2(char *str,Z n); //Adds the string *str with its count + //just as max_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + + //Functions for minium calculation + void min_count(const char *str,Z n); //Adds the string *str with its count + //to the tree if not already in list. String is key and count is data. + //If string is already a key the count is min of n and prior value. + void min_count2(const char *str,Z n); //Adds the string *str with its count + //just as min_count, but also counts number of unique keys in count. + void minp_count2(char *str,Z n); //Adds the string *str with its count + //just as min_count, but also counts number of unique keys in count. + //Does not make copy of string, but uses the pointer str as key pointer. + + Z count(const char *str); //Returns the count if a key (in list) otherwise + //returns 0. + Z count(void); //Returns the count of the current string. Assumes the + //pointers have already been set by a search or node_next call. + Z total; //Holds the total of all counts added for all keys. +}; + +template +BCount::BCount() : List() { + total=0; +} + +template +BCount::~BCount(){ + if(copy)return; + Z *pk; + this->node_first(); + while(this->node_next()){ + pk=(Z *)(this->give_ptr()); + if(pk)delete pk; + } +} + +template +void BCount::add_count(const char *pch,Z n){ + Z *ppt; + Node *np; + total+=n; + if(this->search(pch)==0){ + ppt = new Z; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + } + else { + (*(Z *) this->give_ptr())+=n; + } +} + +template +void BCount::add_count2(const char *pch,Z n){ + Z *ppt; + Node *np; + total+=n; + if(this->search(pch)==0){ + ppt = new Z; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + cnt_key++; + } + else { + (*(Z *) this->give_ptr())+=n; + } +} + +template +void BCount::addp_count2(char *pch,Z n){ + Z *ppt; + Node *np; + total+=n; + if(this->search(pch)==0){ + ppt = new Z; + (*ppt) =n; + np=new Node; + np->str=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + (*(Z *) this->give_ptr())+=n; + } +} + +template +void BCount::correct(const char *pch,Z n){ + if(this->search(pch)){ + (*(Z *) this->give_ptr())=n; + } +} + +template +Z BCount::count(const char *pch){ + if(this->search(pch)==0){ + return(0); + } + else { + return(*((Z *) this->give_ptr())); + } +} + +template +Z BCount::count(void){ + return(*((Z *) this->give_ptr())); +} + +template +void BCount::max_count(const char *pch,Z n){ + Z *ppt,i; + Node *np; + total+=n; + if(!search(pch)){ + ppt = new Z; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + } + else { + ppt=(Z *)give_ptr(); + if(*ppt +void BCount::max_count2(const char *pch,Z n){ + Z *ppt,i; + Node *np; + total+=n; + if(!search(pch)){ + ppt = new Z; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + cnt_key++; + } + else { + ppt=(Z *)give_ptr(); + if(*ppt +void BCount::maxp_count2(char *pch,Z n){ + Z *ppt,i; + Node *np; + total+=n; + if(!search(pch)){ + ppt = new Z; + (*ppt) =n; + np=new Node; + np->str=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + ppt=(Z *)give_ptr(); + if(*ppt +void BCount::min_count(const char *pch,Z n){ + Z *ppt,i; + Node *np; + total+=n; + if(!search(pch)){ + ppt = new Z; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + } + else { + ppt=(Z *)give_ptr(); + if(*ppt>n)*ppt=n; + } +} + +template +void BCount::min_count2(const char *pch,Z n){ + Z *ppt,i; + Node *np; + total+=n; + if(!search(pch)){ + ppt = new Z; + (*ppt) =n; + np=new Node(pch,(void*)ppt); + this->insert(np); + cnt_key++; + } + else { + ppt=(Z *)give_ptr(); + if(*ppt>n)*ppt=n; + } +} + +template +void BCount::minp_count2(char *pch,Z n){ + Z *ppt,i; + Node *np; + total+=n; + if(!search(pch)){ + ppt = new Z; + (*ppt) =n; + np=new Node; + np->str=pch; + np->rel=ppt; + this->insert(np); + cnt_key++; + } + else { + ppt=(Z *)give_ptr(); + if(*ppt>n)*ppt=n; + } +} + +} +#endif diff --git a/Library/FBase.C b/Library/FBase.C index 1321d65a334ddd5a0bb2fc767e8a31513457a073..6d109a44d48ced91c94e2ef9acb7f4f58a6032e5 100644 --- a/Library/FBase.C +++ b/Library/FBase.C @@ -1,600 +1,600 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "runn.h" -#include "FBase.h" - -using namespace std; -namespace iret { - -FBase::FBase(const char *typ,const char *nam){ - int lxn=strlen(typ); - type=new char[lxn+1]; - tpnm=-1; - nmnm=-1; - strcpy(type,typ); - lxn=strlen(nam); - name=new char[lxn+1]; - strcpy(name,nam); - cflag=0; - oflag=0; - pflag=get_qflag(); - eflag=1; -} - -FBase::FBase(const char *typ,int tpn,const char *nam){ - int lxn=strlen(typ); - type=new char[lxn+1]; - tpnm=tpn; - nmnm=-1; - strcpy(type,typ); - lxn=strlen(nam); - name=new char[lxn+1]; - strcpy(name,nam); - cflag=0; - oflag=0; - pflag=get_qflag(); - eflag=1; -} - -FBase::FBase(const char *typ,const char *nam,const char *pt){ - int lxn=strlen(typ); - type=new char[lxn+1]; - tpnm=-1; - nmnm=-1; - strcpy(type,typ); - lxn=strlen(nam); - name=new char[lxn+1]; - strcpy(name,nam); - cflag=0; - oflag=0; - pflag=get_qflag(); - if(*pt!=':')set_path_name(pt); - else set_path_internal(pt+1); -} - -FBase::~FBase(void){ - delete [] type; - delete [] name; -} - -void FBase::set_type_num(int tn){tpnm=tn;} - -void FBase::set_name_num(int nn){nmnm=nn;} - -void FBase::change_type(const char *typ){ - if(type!=NULL)delete [] type; - int lxn=strlen(typ); - type=new char[lxn+1]; - strcpy(type,typ); -} - -void FBase::change_name(const char *nam){ - if(name!=NULL)delete [] name; - int lxn=strlen(nam); - name=new char[lxn+1]; - strcpy(name,nam); -} - -void FBase::set_name(const char *nam){ - if(name!=NULL)delete [] name; - int lxn=strlen(nam); - name=new char[lxn+1]; - strcpy(name,nam); -} - -void FBase::subname(const char *tph,const char *tpl,const char *nm){ - char cnam[max_str]; - long i=strlen(tpl); - strcpy(cnam,tpl); - cnam[i]='_'; - cnam[i+1]='\0'; - strcat(cnam,nm); - change_type(tph); - change_name(cnam); -} - -void FBase::set_path_internal(const char *pt){ - long len; - if(pt&&(len=strlen(pt))){ - eflag=0; - path=new char[len+1]; - strcpy(path,pt); - } - else eflag=1; -} - -void FBase::set_path_name(const char *pa){ - long len; - if(pa&&(len=strlen(pa))){ - eflag=2; - pnam=new char[len+1]; - strcpy(pnam,pa); - } - else eflag=1; -} - -void FBase::map_down(FBase *pFb){ - pFb->change_type(type); - pFb->change_name(name); - pFb->set_type_num(tpnm); - pFb->set_name_num(nmnm); - pFb->pflag=pflag; - if(eflag==2)pFb->set_path_name(pnam); - else if(!eflag)pFb->set_path_internal(path); -} - -void FBase::map_down_sub(FBase *pFb,const char *subtype){ - pFb->subname(type,name,subtype); - pFb->set_type_num(tpnm); - pFb->set_name_num(nmnm); - pFb->pflag=pflag; - if(eflag==2)pFb->set_path_name(pnam); - else if(!eflag)pFb->set_path_internal(path); -} - -void FBase::get_pathx(char *nam,const char *ch){ - char cnam[256]; - ifstream fin; - - if(eflag==2){ - strcpy(cnam,"path_"); - strcat(cnam,pnam); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path"); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - cout << "Path file for type " << type << " does not exist!" << endl; - exit(0); - } - } - fin.getline(nam,256); - fin.close(); - } - else if(eflag){ - strcpy(cnam,"path_"); - strcat(cnam,type); - strcat(cnam,"_"); - strcat(cnam,name); - strcat(cnam,"."); - strcat(cnam,ch); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path_"); - strcat(cnam,type); - strcat(cnam,"_"); - strcat(cnam,name); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path_"); - strcat(cnam,type); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path"); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - cout << "Path file for type " << type << " does not exist!" << endl; - exit(0); - } - } - } - } - fin.getline(nam,256); - fin.close(); - } - else { - strcpy(nam,path); - } - - if(tpnm<0)strcat(nam,type); - else cat_num(type,tpnm,nam); - strcat(nam,"_"); - if(nmnm<0)strcat(nam,name); - else cat_num(name,nmnm,nam); - strcat(nam,"."); - strcat(nam,ch); -} - -void FBase::get_pathx(char *nam,long n,const char *ch){ - char cnam[256],bnam[256]; - ifstream fin; - - if(eflag==2){ - strcpy(cnam,"path_"); - strcat(cnam,pnam); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path"); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - cout << "Path file for type " << type << " does not exist!" << endl; - exit(0); - } - } - fin.getline(nam,256); - fin.close(); - } - else if(eflag){ - strcpy(cnam,"path_"); - strcat(cnam,type); - strcat(cnam,"_"); - strcat(cnam,name); - strcat(cnam,"."); - strcat(cnam,ch); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path_"); - strcat(cnam,type); - strcat(cnam,"_"); - strcat(cnam,name); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path_"); - strcat(cnam,type); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path"); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - cout << "Path file for type " << type << " does not exist!" << endl; - exit(0); - } - } - } - } - fin.getline(nam,256); - fin.close(); - } - else { - strcpy(nam,path); - } - - if(tpnm<0)strcat(nam,type); - else cat_num(type,tpnm,nam); - strcat(nam,"_"); - strcat(nam,add_num(name,n,bnam)); - strcat(nam,"."); - strcat(nam,ch); -} - -char *FBase::add_num(const char *ptr,long n,char *buf){ - char cnam[100]; - long_str(cnam,n); - strcpy(buf,ptr); - strcat(buf,cnam); - return(buf); -} - -char *FBase::cat_num(const char *ptr,long n,char *buf){ - char cnam[100]; - long_str(cnam,n); - strcat(buf,ptr); - strcat(buf,cnam); - return(buf); -} - -int FBase::Gcom(int sflag){ - if((cflag&sflag)&&!(oflag&sflag)){ - oflag=oflag|sflag; - return(1); - } - else return(0); -} - -int FBase::Rcom(int sflag){ - if((cflag&sflag)&&(oflag&sflag)){ - oflag=oflag&(~sflag); - return(1); - } - else return(0); -} - -ifstream *FBase::get_Istr(const char *a,ios::openmode mode){ - char cnam[max_str]; - get_pathx(cnam,a); - ifstream *pfin=new ifstream(cnam,mode); - if(pfin->is_open())return(pfin); - else { - cout << "Error: " << cnam << " failed to open!" << endl; - exit(0); - } -} - -ofstream *FBase::get_Ostr(const char *a,ios::openmode mode){ - char cnam[max_str]; - get_pathx(cnam,a); - ofstream *pfout=new ofstream(cnam,mode); - if(pfout->is_open())return(pfout); - else { - cout << "Error: " << cnam << " failed to open!" << endl; - exit(0); - } -} - -fstream *FBase::get_Fstr(const char *a,ios::openmode mode){ - char cnam[max_str]; - get_pathx(cnam,a); - fstream *pfstr=new fstream(cnam,mode); - if(pfstr->is_open())return(pfstr); - else { - cout << "Error: " << cnam << " failed to open!" << endl; - exit(0); - } -} - -ifstream *FBase::get_Istr(long n,const char *a,ios::openmode mode){ - char cnam[max_str]; - get_pathx(cnam,n,a); - ifstream *pfin=new ifstream(cnam,mode); - if(pfin->is_open())return(pfin); - else { - cout << "Error: " << cnam << " failed to open!" << endl; - exit(0); - } -} - -ofstream *FBase::get_Ostr(long n,const char *a,ios::openmode mode){ - char cnam[max_str]; - get_pathx(cnam,n,a); - ofstream *pfout=new ofstream(cnam,mode); - if(pfout->is_open())return(pfout); - else { - cout << "Error: " << cnam << " failed to open!" << endl; - exit(0); - } -} - -fstream *FBase::get_Fstr(long n,const char *a,ios::openmode mode){ - char cnam[max_str]; - get_pathx(cnam,n,a); - fstream *pfstr=new fstream(cnam,mode); - if(pfstr->is_open())return(pfstr); - else { - cout << "Error: " << cnam << " failed to open!" << endl; - exit(0); - } -} - -void FBase::dst_Istr(ifstream *pfin){ - if(!pfin)return; - if(!pfin->is_open()){ - cout << "File not open!" << endl; - exit(0); - } - delete pfin; -} - -void FBase::dst_Ostr(ofstream *pfout){ - if(!pfout)return; - if(!pfout->is_open()){ - cout << "File not open!" << endl; - exit(0); - } - delete pfout; -} - -void FBase::dst_Fstr(fstream *pfstr){ - if(!pfstr)return; - if(!pfstr->is_open()){ - cout << "File not open!" << endl; - exit(0); - } - delete pfstr; -} - -long FBase::get_Fsiz(const char *a){ - if(!Exists(a))return(0); - int fld; - struct stat datf; - char cnam[max_str]; - get_pathx(cnam,a); - fld=::open(cnam,O_RDONLY); - if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} - if(fstat(fld,&datf)){cout << cnam << " failed on size \ - determination" << endl;exit(0);} - ::close(fld); - return(datf.st_size); -} - -long FBase::get_Fsiz(long n,const char *a){ - if(!Exists(n,a))return(0); - int fld; - struct stat datf; - char cnam[max_str]; - get_pathx(cnam,n,a); - fld=::open(cnam,O_RDONLY); - if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} - if(fstat(fld,&datf)){cout << cnam << " failed on size \ - determination" << endl;exit(0);} - ::close(fld); - return(datf.st_size); -} - -char *FBase::get_Read(const char *a){ - int fld; - struct stat datf; - char cnam[max_str]; - get_pathx(cnam,a); - fld=::open(cnam,O_RDONLY); - if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} - if(fstat(fld,&datf)){cout << cnam << " failed on size \ - determination" << endl;exit(0);} - ::close(fld); - char *ptr=new char[datf.st_size]; - ifstream fin(cnam,ios::in); - if(!fin.is_open()){ - cout << "Error: " << cnam << " failed to open!" << endl; - exit(0); - } - fin.read(ptr,datf.st_size); - return(ptr); -} - -char *FBase::get_Read(long n,const char *a){ - int fld; - struct stat datf; - char cnam[max_str]; - get_pathx(cnam,n,a); - fld=::open(cnam,O_RDONLY); - if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} - if(fstat(fld,&datf)){cout << cnam << " failed on size \ - determination" << endl;exit(0);} - ::close(fld); - char *ptr=new char[datf.st_size]; - ifstream fin(cnam,ios::in); - if(!fin.is_open()){ - cout << "Error: " << cnam << " failed to open!" << endl; - exit(0); - } - fin.read(ptr,datf.st_size); - return(ptr); -} - -char *FBase::get_Mmap(const char *a){ - int fld; - struct stat datf; - char cnam[max_str]; - get_pathx(cnam,a); - fld=::open(cnam,O_RDONLY); - if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} - if(fstat(fld,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} - char *ptr=(char*)mmap(0,datf.st_size,PROT_READ,MAP_PRIVATE|MAP_NORESERVE,fld,0); - if(ptr==MAP_FAILED){cout << cnam << " failed to map" << endl;exit(0);} - ::close(fld); - return(ptr); -} - -char *FBase::get_Mmap(long n,const char *a){ - int fld; - struct stat datf; - char cnam[max_str]; - get_pathx(cnam,n,a); - fld=::open(cnam,O_RDONLY); - if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} - if(fstat(fld,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} - char *ptr=(char*)mmap(0,datf.st_size,PROT_READ,MAP_PRIVATE|MAP_NORESERVE,fld,0); - if(ptr==MAP_FAILED){cout << cnam << " failed to map" << endl;exit(0);} - ::close(fld); - return(ptr); -} - -char *FBase::get_Wmap(const char *a){ - int fld; - struct stat datf; - char cnam[max_str]; - get_pathx(cnam,a); - fld=::open(cnam,O_RDWR); - if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} - if(fstat(fld,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} - char *ptr=(char*)mmap(0,datf.st_size,PROT_READ|PROT_WRITE,MAP_SHARED,fld,0); - if(ptr==MAP_FAILED){cout << cnam << " failed to map" << endl;exit(0);} - ::close(fld); - return(ptr); -} - -char *FBase::get_Wmap(long n,const char *a){ - int fld; - struct stat datf; - char cnam[max_str]; - get_pathx(cnam,n,a); - fld=::open(cnam,O_RDWR); - if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} - if(fstat(fld,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} - char *ptr=(char*)mmap(0,datf.st_size,PROT_READ|PROT_WRITE,MAP_SHARED,fld,0); - if(ptr==MAP_FAILED){cout << cnam << " failed to map" << endl;exit(0);} - ::close(fld); - return(ptr); -} - -void FBase::dst_Mmap(const char *a,char *ptr){ - struct stat datf; - char cnam[max_str]; - if(ptr==NULL){cout << "NULL pointer" << endl;return;} - get_pathx(cnam,a); - if(stat(cnam,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} - if(munmap(ptr,datf.st_size)){cout << cnam << " failed to unmap" << endl;exit(0);} - ptr=NULL; -} - -void FBase::dst_Mmap(long n,const char *a,char *ptr){ - struct stat datf; - char cnam[max_str]; - if(ptr==NULL){cout << "NULL pointer" << endl;return;} - get_pathx(cnam,n,a); - if(stat(cnam,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} - if(munmap(ptr,datf.st_size)){cout << cnam << " failed to unmap" << endl;exit(0);} - ptr=NULL; -} - -void FBase::bin_Writ(const char *a,long nm,char *ptr){ - ofstream *pfout=get_Ostr(a,ios::out); - long k=100000,i=0; - while(i+kwrite((char*)ptr,k); - i+=k; - ptr=ptr+k; - } - pfout->write((char*)ptr,nm-i); - pfout->close(); - delete pfout; -} - -void FBase::bin_Writ(long n,const char *a,long nm,char *ptr){ - ofstream *pfout=get_Ostr(n,a,ios::out); - long k=100000,i=0; - while(i+kwrite((char*)ptr,k); - i+=k; - ptr=ptr+k; - } - pfout->write((char*)ptr,nm-i); - pfout->close(); - delete pfout; -} - -int FBase::Exists(const char *a){ - char cnam[max_str]; - get_pathx(cnam,a); - ifstream fin(cnam,ios::in); - if(fin.is_open()){ - fin.close(); - return(1); - } - else return(0); -} - -int FBase::Exists(long n,const char *a){ - char cnam[max_str]; - get_pathx(cnam,n,a); - ifstream fin(cnam,ios::in); - if(fin.is_open()){ - fin.close(); - return(1); - } - else return(0); -} - -void FBase::mark(long ct, int ivl, const char *what){ - if(pflag&&(ct%ivl==0)){ - cout << what << " count=" << ct << endl; - } -} - -} +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "runn.h" +#include "FBase.h" + +using namespace std; +namespace iret { + +FBase::FBase(const char *typ,const char *nam){ + int lxn=strlen(typ); + type=new char[lxn+1]; + tpnm=-1; + nmnm=-1; + strcpy(type,typ); + lxn=strlen(nam); + name=new char[lxn+1]; + strcpy(name,nam); + cflag=0; + oflag=0; + pflag=get_qflag(); + eflag=1; +} + +FBase::FBase(const char *typ,int tpn,const char *nam){ + int lxn=strlen(typ); + type=new char[lxn+1]; + tpnm=tpn; + nmnm=-1; + strcpy(type,typ); + lxn=strlen(nam); + name=new char[lxn+1]; + strcpy(name,nam); + cflag=0; + oflag=0; + pflag=get_qflag(); + eflag=1; +} + +FBase::FBase(const char *typ,const char *nam,const char *pt){ + int lxn=strlen(typ); + type=new char[lxn+1]; + tpnm=-1; + nmnm=-1; + strcpy(type,typ); + lxn=strlen(nam); + name=new char[lxn+1]; + strcpy(name,nam); + cflag=0; + oflag=0; + pflag=get_qflag(); + if(*pt!=':')set_path_name(pt); + else set_path_internal(pt+1); +} + +FBase::~FBase(void){ + delete [] type; + delete [] name; +} + +void FBase::set_type_num(int tn){tpnm=tn;} + +void FBase::set_name_num(int nn){nmnm=nn;} + +void FBase::change_type(const char *typ){ + if(type!=NULL)delete [] type; + int lxn=strlen(typ); + type=new char[lxn+1]; + strcpy(type,typ); +} + +void FBase::change_name(const char *nam){ + if(name!=NULL)delete [] name; + int lxn=strlen(nam); + name=new char[lxn+1]; + strcpy(name,nam); +} + +void FBase::set_name(const char *nam){ + if(name!=NULL)delete [] name; + int lxn=strlen(nam); + name=new char[lxn+1]; + strcpy(name,nam); +} + +void FBase::subname(const char *tph,const char *tpl,const char *nm){ + char cnam[max_str]; + long i=strlen(tpl); + strcpy(cnam,tpl); + cnam[i]='_'; + cnam[i+1]='\0'; + strcat(cnam,nm); + change_type(tph); + change_name(cnam); +} + +void FBase::set_path_internal(const char *pt){ + long len; + if(pt&&(len=strlen(pt))){ + eflag=0; + path=new char[len+1]; + strcpy(path,pt); + } + else eflag=1; +} + +void FBase::set_path_name(const char *pa){ + long len; + if(pa&&(len=strlen(pa))){ + eflag=2; + pnam=new char[len+1]; + strcpy(pnam,pa); + } + else eflag=1; +} + +void FBase::map_down(FBase *pFb){ + pFb->change_type(type); + pFb->change_name(name); + pFb->set_type_num(tpnm); + pFb->set_name_num(nmnm); + pFb->pflag=pflag; + if(eflag==2)pFb->set_path_name(pnam); + else if(!eflag)pFb->set_path_internal(path); +} + +void FBase::map_down_sub(FBase *pFb,const char *subtype){ + pFb->subname(type,name,subtype); + pFb->set_type_num(tpnm); + pFb->set_name_num(nmnm); + pFb->pflag=pflag; + if(eflag==2)pFb->set_path_name(pnam); + else if(!eflag)pFb->set_path_internal(path); +} + +void FBase::get_pathx(char *nam,const char *ch){ + char cnam[256]; + ifstream fin; + + if(eflag==2){ + strcpy(cnam,"path_"); + strcat(cnam,pnam); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path"); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + cout << "Path file for type " << type << " does not exist!" << endl; + exit(0); + } + } + fin.getline(nam,256); + fin.close(); + } + else if(eflag){ + strcpy(cnam,"path_"); + strcat(cnam,type); + strcat(cnam,"_"); + strcat(cnam,name); + strcat(cnam,"."); + strcat(cnam,ch); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path_"); + strcat(cnam,type); + strcat(cnam,"_"); + strcat(cnam,name); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path_"); + strcat(cnam,type); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path"); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + cout << "Path file for type " << type << " does not exist!" << endl; + exit(0); + } + } + } + } + fin.getline(nam,256); + fin.close(); + } + else { + strcpy(nam,path); + } + + if(tpnm<0)strcat(nam,type); + else cat_num(type,tpnm,nam); + strcat(nam,"_"); + if(nmnm<0)strcat(nam,name); + else cat_num(name,nmnm,nam); + strcat(nam,"."); + strcat(nam,ch); +} + +void FBase::get_pathx(char *nam,long n,const char *ch){ + char cnam[256],bnam[256]; + ifstream fin; + + if(eflag==2){ + strcpy(cnam,"path_"); + strcat(cnam,pnam); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path"); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + cout << "Path file for type " << type << " does not exist!" << endl; + exit(0); + } + } + fin.getline(nam,256); + fin.close(); + } + else if(eflag){ + strcpy(cnam,"path_"); + strcat(cnam,type); + strcat(cnam,"_"); + strcat(cnam,name); + strcat(cnam,"."); + strcat(cnam,ch); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path_"); + strcat(cnam,type); + strcat(cnam,"_"); + strcat(cnam,name); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path_"); + strcat(cnam,type); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path"); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + cout << "Path file for type " << type << " does not exist!" << endl; + exit(0); + } + } + } + } + fin.getline(nam,256); + fin.close(); + } + else { + strcpy(nam,path); + } + + if(tpnm<0)strcat(nam,type); + else cat_num(type,tpnm,nam); + strcat(nam,"_"); + strcat(nam,add_num(name,n,bnam)); + strcat(nam,"."); + strcat(nam,ch); +} + +char *FBase::add_num(const char *ptr,long n,char *buf){ + char cnam[100]; + long_str(cnam,n); + strcpy(buf,ptr); + strcat(buf,cnam); + return(buf); +} + +char *FBase::cat_num(const char *ptr,long n,char *buf){ + char cnam[100]; + long_str(cnam,n); + strcat(buf,ptr); + strcat(buf,cnam); + return(buf); +} + +int FBase::Gcom(int sflag){ + if((cflag&sflag)&&!(oflag&sflag)){ + oflag=oflag|sflag; + return(1); + } + else return(0); +} + +int FBase::Rcom(int sflag){ + if((cflag&sflag)&&(oflag&sflag)){ + oflag=oflag&(~sflag); + return(1); + } + else return(0); +} + +ifstream *FBase::get_Istr(const char *a,ios::openmode mode){ + char cnam[max_str]; + get_pathx(cnam,a); + ifstream *pfin=new ifstream(cnam,mode); + if(pfin->is_open())return(pfin); + else { + cout << "Error: " << cnam << " failed to open!" << endl; + exit(0); + } +} + +ofstream *FBase::get_Ostr(const char *a,ios::openmode mode){ + char cnam[max_str]; + get_pathx(cnam,a); + ofstream *pfout=new ofstream(cnam,mode); + if(pfout->is_open())return(pfout); + else { + cout << "Error: " << cnam << " failed to open!" << endl; + exit(0); + } +} + +fstream *FBase::get_Fstr(const char *a,ios::openmode mode){ + char cnam[max_str]; + get_pathx(cnam,a); + fstream *pfstr=new fstream(cnam,mode); + if(pfstr->is_open())return(pfstr); + else { + cout << "Error: " << cnam << " failed to open!" << endl; + exit(0); + } +} + +ifstream *FBase::get_Istr(long n,const char *a,ios::openmode mode){ + char cnam[max_str]; + get_pathx(cnam,n,a); + ifstream *pfin=new ifstream(cnam,mode); + if(pfin->is_open())return(pfin); + else { + cout << "Error: " << cnam << " failed to open!" << endl; + exit(0); + } +} + +ofstream *FBase::get_Ostr(long n,const char *a,ios::openmode mode){ + char cnam[max_str]; + get_pathx(cnam,n,a); + ofstream *pfout=new ofstream(cnam,mode); + if(pfout->is_open())return(pfout); + else { + cout << "Error: " << cnam << " failed to open!" << endl; + exit(0); + } +} + +fstream *FBase::get_Fstr(long n,const char *a,ios::openmode mode){ + char cnam[max_str]; + get_pathx(cnam,n,a); + fstream *pfstr=new fstream(cnam,mode); + if(pfstr->is_open())return(pfstr); + else { + cout << "Error: " << cnam << " failed to open!" << endl; + exit(0); + } +} + +void FBase::dst_Istr(ifstream *pfin){ + if(!pfin)return; + if(!pfin->is_open()){ + cout << "File not open!" << endl; + exit(0); + } + delete pfin; +} + +void FBase::dst_Ostr(ofstream *pfout){ + if(!pfout)return; + if(!pfout->is_open()){ + cout << "File not open!" << endl; + exit(0); + } + delete pfout; +} + +void FBase::dst_Fstr(fstream *pfstr){ + if(!pfstr)return; + if(!pfstr->is_open()){ + cout << "File not open!" << endl; + exit(0); + } + delete pfstr; +} + +long FBase::get_Fsiz(const char *a){ + if(!Exists(a))return(0); + int fld; + struct stat datf; + char cnam[max_str]; + get_pathx(cnam,a); + fld=::open(cnam,O_RDONLY); + if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} + if(fstat(fld,&datf)){cout << cnam << " failed on size \ + determination" << endl;exit(0);} + ::close(fld); + return(datf.st_size); +} + +long FBase::get_Fsiz(long n,const char *a){ + if(!Exists(n,a))return(0); + int fld; + struct stat datf; + char cnam[max_str]; + get_pathx(cnam,n,a); + fld=::open(cnam,O_RDONLY); + if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} + if(fstat(fld,&datf)){cout << cnam << " failed on size \ + determination" << endl;exit(0);} + ::close(fld); + return(datf.st_size); +} + +char *FBase::get_Read(const char *a){ + int fld; + struct stat datf; + char cnam[max_str]; + get_pathx(cnam,a); + fld=::open(cnam,O_RDONLY); + if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} + if(fstat(fld,&datf)){cout << cnam << " failed on size \ + determination" << endl;exit(0);} + ::close(fld); + char *ptr=new char[datf.st_size]; + ifstream fin(cnam,ios::in); + if(!fin.is_open()){ + cout << "Error: " << cnam << " failed to open!" << endl; + exit(0); + } + fin.read(ptr,datf.st_size); + return(ptr); +} + +char *FBase::get_Read(long n,const char *a){ + int fld; + struct stat datf; + char cnam[max_str]; + get_pathx(cnam,n,a); + fld=::open(cnam,O_RDONLY); + if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} + if(fstat(fld,&datf)){cout << cnam << " failed on size \ + determination" << endl;exit(0);} + ::close(fld); + char *ptr=new char[datf.st_size]; + ifstream fin(cnam,ios::in); + if(!fin.is_open()){ + cout << "Error: " << cnam << " failed to open!" << endl; + exit(0); + } + fin.read(ptr,datf.st_size); + return(ptr); +} + +char *FBase::get_Mmap(const char *a){ + int fld; + struct stat datf; + char cnam[max_str]; + get_pathx(cnam,a); + fld=::open(cnam,O_RDONLY); + if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} + if(fstat(fld,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} + char *ptr=(char*)mmap(0,datf.st_size,PROT_READ,MAP_PRIVATE|MAP_NORESERVE,fld,0); + if(ptr==MAP_FAILED){cout << cnam << " failed to map" << endl;exit(0);} + ::close(fld); + return(ptr); +} + +char *FBase::get_Mmap(long n,const char *a){ + int fld; + struct stat datf; + char cnam[max_str]; + get_pathx(cnam,n,a); + fld=::open(cnam,O_RDONLY); + if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} + if(fstat(fld,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} + char *ptr=(char*)mmap(0,datf.st_size,PROT_READ,MAP_PRIVATE|MAP_NORESERVE,fld,0); + if(ptr==MAP_FAILED){cout << cnam << " failed to map" << endl;exit(0);} + ::close(fld); + return(ptr); +} + +char *FBase::get_Wmap(const char *a){ + int fld; + struct stat datf; + char cnam[max_str]; + get_pathx(cnam,a); + fld=::open(cnam,O_RDWR); + if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} + if(fstat(fld,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} + char *ptr=(char*)mmap(0,datf.st_size,PROT_READ|PROT_WRITE,MAP_SHARED,fld,0); + if(ptr==MAP_FAILED){cout << cnam << " failed to map" << endl;exit(0);} + ::close(fld); + return(ptr); +} + +char *FBase::get_Wmap(long n,const char *a){ + int fld; + struct stat datf; + char cnam[max_str]; + get_pathx(cnam,n,a); + fld=::open(cnam,O_RDWR); + if(fld<=0){cout << cnam << " failed to open" << endl;exit(0);} + if(fstat(fld,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} + char *ptr=(char*)mmap(0,datf.st_size,PROT_READ|PROT_WRITE,MAP_SHARED,fld,0); + if(ptr==MAP_FAILED){cout << cnam << " failed to map" << endl;exit(0);} + ::close(fld); + return(ptr); +} + +void FBase::dst_Mmap(const char *a,char *ptr){ + struct stat datf; + char cnam[max_str]; + if(ptr==NULL){cout << "NULL pointer" << endl;return;} + get_pathx(cnam,a); + if(stat(cnam,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} + if(munmap(ptr,datf.st_size)){cout << cnam << " failed to unmap" << endl;exit(0);} + ptr=NULL; +} + +void FBase::dst_Mmap(long n,const char *a,char *ptr){ + struct stat datf; + char cnam[max_str]; + if(ptr==NULL){cout << "NULL pointer" << endl;return;} + get_pathx(cnam,n,a); + if(stat(cnam,&datf)){cout << cnam << " failed on size determination" << endl;exit(0);} + if(munmap(ptr,datf.st_size)){cout << cnam << " failed to unmap" << endl;exit(0);} + ptr=NULL; +} + +void FBase::bin_Writ(const char *a,long nm,char *ptr){ + ofstream *pfout=get_Ostr(a,ios::out); + long k=100000,i=0; + while(i+kwrite((char*)ptr,k); + i+=k; + ptr=ptr+k; + } + pfout->write((char*)ptr,nm-i); + pfout->close(); + delete pfout; +} + +void FBase::bin_Writ(long n,const char *a,long nm,char *ptr){ + ofstream *pfout=get_Ostr(n,a,ios::out); + long k=100000,i=0; + while(i+kwrite((char*)ptr,k); + i+=k; + ptr=ptr+k; + } + pfout->write((char*)ptr,nm-i); + pfout->close(); + delete pfout; +} + +int FBase::Exists(const char *a){ + char cnam[max_str]; + get_pathx(cnam,a); + ifstream fin(cnam,ios::in); + if(fin.is_open()){ + fin.close(); + return(1); + } + else return(0); +} + +int FBase::Exists(long n,const char *a){ + char cnam[max_str]; + get_pathx(cnam,n,a); + ifstream fin(cnam,ios::in); + if(fin.is_open()){ + fin.close(); + return(1); + } + else return(0); +} + +void FBase::mark(long ct, int ivl, const char *what){ + if(pflag&&(ct%ivl==0)){ + cout << what << " count=" << ct << endl; + } +} + +} diff --git a/Library/FBase.h b/Library/FBase.h index 330446017c2ee5a9dec23d114d6732752de9ae26..12f4b0252c7b5be585bbee51739567f4b37c8fb5 100644 --- a/Library/FBase.h +++ b/Library/FBase.h @@ -1,248 +1,248 @@ -#ifndef FBASE_H -#define FBASE_H - -#include -#include - -using namespace std; -namespace iret { - -typedef char *pChr; - -class FBase { - public: - FBase(const char *tp,const char *nm); //tp is type name, nm is name - FBase(const char *tp,int tn,const char *nm); //tp is type name, if - //nonnegative tn is appended to end of tp, nm is name - FBase(const char *tp,const char *nm,const char *pt); //tp is type name, nm is name - //pt is pointer at a string sss and reads the path from file path_sss in - //current directory. But if sss begins with ':' then skips this character - //and remaining string is the path string itself. - ~FBase(); - void set_type_num(int tn); //Sets tpnm and uses if nonnegative: appended - //to end of type name - void set_name_num(int nn); //Sets nmnm and uses if nonnegative: appended - //to end of name - void change_type(const char *nm); //Allows change of type string for class. - void change_name(const char *nm); //Allows change of name string for class. - void set_name(const char *nm); //Allows change of name string for class. - //Included for compatibility - void subname(const char *tph,const char *tpl,const char *nm); //Uses the - //higher level type tph as type and combines lower level tpl_nm with - //name to allow one to keep track of file types. - void set_path_internal(const char *pt); //Path is by default external with - //eflag=1. But if this function called with nonempty string, then eflag=0 - //and pt stored in path and used for access to data. - void set_path_name(const char *pa); //path will be extracted from path_pa - //and eflag=2. Naming conventions for files are unchanged - void map_down(FBase *pFb); //Maps naming parameters to class instance pFb - void map_down_sub(FBase *pFb,const char *subtype); //Maps naming parameters to class instance pFb - //combines subtype with name to make a new name for pFb and type becomes its type - - //Path access functions - void get_pathx(char *cn,const char *a); - //Reads the path from a file "path_(*name)" and constructs the - //file name from as "(*type)_(*name).(*a)". Cats path and file - //name and returns the full info in cn. - void get_pathx(char *cn,long n,const char *a); - char *add_num(const char *ptr,long n,char *buf); //converts long to ascii - //and cats to end of string and returns pointer to new string - //that results. Does not change input string. The new string is - //held in buffer space and this is overwritten at each call. - char *cat_num(const char *ptr,long n,char *buf); //converts long to ascii - //and cats to end of ptr string and then cats result to end of - //whatever is in buffer. Does not change input string. The new string is - //held in buffer space. - - //Stream object pointers - ifstream *get_Istr(const char *a,ios::openmode m=ios::in); - //Opens input file stream by path and name composition. - ofstream *get_Ostr(const char *a,ios::openmode m=ios::out); - //Opens output file stream by path and name composition. - fstream *get_Fstr(const char *a,ios::openmode m=ios::in|ios::out); - //Opens output file stream by path and name composition. - ifstream *get_Istr(long n,const char *a,ios::openmode m=ios::in); - ofstream *get_Ostr(long n,const char *a,ios::openmode m=ios::out); - fstream *get_Fstr(long n,const char *a,ios::openmode m=ios::in|ios::out); - void dst_Istr(ifstream *pfin); - void dst_Ostr(ofstream *pfout); - void dst_Fstr(fstream *pfstr); - - //Get file size in bytes - long get_Fsiz(const char *a); - long get_Fsiz(long n,const char *a); - - //File existence - int Exists(const char *a); //returns 1 if file exists - int Exists(long n,const char *a); //returns 1 if file exists - - //Read in array pointers - char *get_Read(const char *a); - //Reads in a file into an char array and returns pointer - char *get_Read(long n,const char *a); - - //Memory map pointers - char *get_Mmap(const char *a); - //Memory maps file by path and name composition. - char *get_Mmap(long n,const char *a); - char *get_Wmap(const char *a); - //Memory maps file by path and name composition. - //Allows to modify contents and is written out when dst_Mmap called - char *get_Wmap(long n,const char *a); - //Allows to modify contents and is written out when dst_Mmap called - void dst_Mmap(const char *a,char *ptr); - //Removes the memory map for ptr based on path and name composition. - void dst_Mmap(long n,const char *a,char *ptr); - - //Array of chars and binary write - void bin_Writ(const char *a,long nm,char *ptr); - //Writes out nm bytes binary - void bin_Writ(long n,const char *a,long nm,char *ptr); - - //Write and read 1, 2, or 3 long integers to or from a file - template - void get_Nnum(const char *a,X &m1); - template - void get_Nnum(const char *a,X &m1,Y &m2); - template - void get_Nnum(const char *a,X &m1,Y &m2,Z &m3); - template - void get_Nnum(long n,const char *a,X &m1); - template - void get_Nnum(long n,const char *a,X &m1,Y &m2); - template - void get_Nnum(long n,const char *a,X &m1,Y &m2,Z &m3); - template - void put_Nnum(const char *a,X &m1); - template - void put_Nnum(const char *a,X &m1,Y &m2); - template - void put_Nnum(const char *a,X &m1,Y &m2,Z &m3); - template - void put_Nnum(long n,const char *a,X &m1); - template - void put_Nnum(long n,const char *a,X &m1,Y &m2); - template - void put_Nnum(long n,const char *a,X &m1,Y &m2,Z &m3); - - //Logical accounting functions - int Gcom(int sflag); //sflag is bit marker such as READ_W, etc. - //This returns 1 if sflag bit not set in oflag and is in cflag - //If this is the case then it sets sflag in oflag. - int Rcom(int sflag); - //This returns 1 if sflag bit set in oflag and in cflag - //If this is the case then it turns off sflag in oflag. - void mark(long,int,const char*); - //This function prints out string in 3rd argument and count - //if first argument is multiple of the second - -//Data - int cflag; //Command, what should happen to resources. - int oflag; //Bit string status of resources, 1 open, 0 closed. - int open1; //flags to mark whether a resource is open or not - int open2; //0 means closed, 1 means open - int open3; //Used for those resources that are either completely - int open4; //closed or completely open. - int open5; - char *type; - int tpnm; //If nonnegative integer it is appended to end of type - //in constructing file name - char *name; - int nmnm; //If nonnegative integer it is appended to end of name - //in constructing file name - int pflag; //Usual print flag, 1 for verbose output, 0 for none - //Print flag set to 1 by default. - int eflag; //Flag set to 1 for external path from path file, 0 - //for internal path - char *path; //Path stored here if eflag=0. - char *pnam; //Path extension stored here if eflag=2. -}; - -//Template functions - -template -void FBase::get_Nnum(const char *a,X &m1){ - ifstream *pfin=get_Istr(a,ios::in); - *pfin >> m1; - dst_Istr(pfin); -} - -template -void FBase::get_Nnum(const char *a,X &m1,Y &m2){ - ifstream *pfin=get_Istr(a,ios::in); - *pfin >> m1 >> m2; - dst_Istr(pfin); -} - -template -void FBase::get_Nnum(const char *a,X &m1,Y &m2,Z &m3){ - ifstream *pfin=get_Istr(a,ios::in); - *pfin >> m1 >> m2 >> m3; - dst_Istr(pfin); -} - -template -void FBase::get_Nnum(long n,const char *a,X &m1){ - ifstream *pfin=get_Istr(n,a,ios::in); - *pfin >> m1; - dst_Istr(pfin); -} - -template -void FBase::get_Nnum(long n,const char *a,X &m1,Y &m2){ - ifstream *pfin=get_Istr(n,a,ios::in); - *pfin >> m1 >> m2; - dst_Istr(pfin); -} - -template -void FBase::get_Nnum(long n,const char *a,X &m1,Y &m2,Z &m3){ - ifstream *pfin=get_Istr(n,a,ios::in); - *pfin >> m1 >> m2 >> m3; - dst_Istr(pfin); -} - -template -void FBase::put_Nnum(const char *a,X &m1){ - ofstream *pfout=get_Ostr(a,ios::out); - *pfout << m1 << endl; - dst_Ostr(pfout); -} - -template -void FBase::put_Nnum(const char *a,X &m1,Y &m2){ - ofstream *pfout=get_Ostr(a,ios::out); - *pfout << m1 << " " << m2 << endl; - dst_Ostr(pfout); -} - -template -void FBase::put_Nnum(const char *a,X &m1,Y &m2,Z &m3){ - ofstream *pfout=get_Ostr(a,ios::out); - *pfout << m1 << " " << m2 << " " << m3 << endl; - dst_Ostr(pfout); -} - -template -void FBase::put_Nnum(long n,const char *a,X &m1){ - ofstream *pfout=get_Ostr(n,a,ios::out); - *pfout << m1 << endl; - dst_Ostr(pfout); -} - -template -void FBase::put_Nnum(long n,const char *a,X &m1,Y &m2){ - ofstream *pfout=get_Ostr(n,a,ios::out); - *pfout << m1 << " " << m2 << endl; - dst_Ostr(pfout); -} - -template -void FBase::put_Nnum(long n,const char *a,X &m1,Y &m2,Z &m3){ - ofstream *pfout=get_Ostr(n,a,ios::out); - *pfout << m1 << " " << m2 << " " << m3 << endl; - dst_Ostr(pfout); -} - -} -#endif +#ifndef FBASE_H +#define FBASE_H + +#include +#include + +using namespace std; +namespace iret { + +typedef char *pChr; + +class FBase { + public: + FBase(const char *tp,const char *nm); //tp is type name, nm is name + FBase(const char *tp,int tn,const char *nm); //tp is type name, if + //nonnegative tn is appended to end of tp, nm is name + FBase(const char *tp,const char *nm,const char *pt); //tp is type name, nm is name + //pt is pointer at a string sss and reads the path from file path_sss in + //current directory. But if sss begins with ':' then skips this character + //and remaining string is the path string itself. + ~FBase(); + void set_type_num(int tn); //Sets tpnm and uses if nonnegative: appended + //to end of type name + void set_name_num(int nn); //Sets nmnm and uses if nonnegative: appended + //to end of name + void change_type(const char *nm); //Allows change of type string for class. + void change_name(const char *nm); //Allows change of name string for class. + void set_name(const char *nm); //Allows change of name string for class. + //Included for compatibility + void subname(const char *tph,const char *tpl,const char *nm); //Uses the + //higher level type tph as type and combines lower level tpl_nm with + //name to allow one to keep track of file types. + void set_path_internal(const char *pt); //Path is by default external with + //eflag=1. But if this function called with nonempty string, then eflag=0 + //and pt stored in path and used for access to data. + void set_path_name(const char *pa); //path will be extracted from path_pa + //and eflag=2. Naming conventions for files are unchanged + void map_down(FBase *pFb); //Maps naming parameters to class instance pFb + void map_down_sub(FBase *pFb,const char *subtype); //Maps naming parameters to class instance pFb + //combines subtype with name to make a new name for pFb and type becomes its type + + //Path access functions + void get_pathx(char *cn,const char *a); + //Reads the path from a file "path_(*name)" and constructs the + //file name from as "(*type)_(*name).(*a)". Cats path and file + //name and returns the full info in cn. + void get_pathx(char *cn,long n,const char *a); + char *add_num(const char *ptr,long n,char *buf); //converts long to ascii + //and cats to end of string and returns pointer to new string + //that results. Does not change input string. The new string is + //held in buffer space and this is overwritten at each call. + char *cat_num(const char *ptr,long n,char *buf); //converts long to ascii + //and cats to end of ptr string and then cats result to end of + //whatever is in buffer. Does not change input string. The new string is + //held in buffer space. + + //Stream object pointers + ifstream *get_Istr(const char *a,ios::openmode m=ios::in); + //Opens input file stream by path and name composition. + ofstream *get_Ostr(const char *a,ios::openmode m=ios::out); + //Opens output file stream by path and name composition. + fstream *get_Fstr(const char *a,ios::openmode m=ios::in|ios::out); + //Opens output file stream by path and name composition. + ifstream *get_Istr(long n,const char *a,ios::openmode m=ios::in); + ofstream *get_Ostr(long n,const char *a,ios::openmode m=ios::out); + fstream *get_Fstr(long n,const char *a,ios::openmode m=ios::in|ios::out); + void dst_Istr(ifstream *pfin); + void dst_Ostr(ofstream *pfout); + void dst_Fstr(fstream *pfstr); + + //Get file size in bytes + long get_Fsiz(const char *a); + long get_Fsiz(long n,const char *a); + + //File existence + int Exists(const char *a); //returns 1 if file exists + int Exists(long n,const char *a); //returns 1 if file exists + + //Read in array pointers + char *get_Read(const char *a); + //Reads in a file into an char array and returns pointer + char *get_Read(long n,const char *a); + + //Memory map pointers + char *get_Mmap(const char *a); + //Memory maps file by path and name composition. + char *get_Mmap(long n,const char *a); + char *get_Wmap(const char *a); + //Memory maps file by path and name composition. + //Allows to modify contents and is written out when dst_Mmap called + char *get_Wmap(long n,const char *a); + //Allows to modify contents and is written out when dst_Mmap called + void dst_Mmap(const char *a,char *ptr); + //Removes the memory map for ptr based on path and name composition. + void dst_Mmap(long n,const char *a,char *ptr); + + //Array of chars and binary write + void bin_Writ(const char *a,long nm,char *ptr); + //Writes out nm bytes binary + void bin_Writ(long n,const char *a,long nm,char *ptr); + + //Write and read 1, 2, or 3 long integers to or from a file + template + void get_Nnum(const char *a,X &m1); + template + void get_Nnum(const char *a,X &m1,Y &m2); + template + void get_Nnum(const char *a,X &m1,Y &m2,Z &m3); + template + void get_Nnum(long n,const char *a,X &m1); + template + void get_Nnum(long n,const char *a,X &m1,Y &m2); + template + void get_Nnum(long n,const char *a,X &m1,Y &m2,Z &m3); + template + void put_Nnum(const char *a,X &m1); + template + void put_Nnum(const char *a,X &m1,Y &m2); + template + void put_Nnum(const char *a,X &m1,Y &m2,Z &m3); + template + void put_Nnum(long n,const char *a,X &m1); + template + void put_Nnum(long n,const char *a,X &m1,Y &m2); + template + void put_Nnum(long n,const char *a,X &m1,Y &m2,Z &m3); + + //Logical accounting functions + int Gcom(int sflag); //sflag is bit marker such as READ_W, etc. + //This returns 1 if sflag bit not set in oflag and is in cflag + //If this is the case then it sets sflag in oflag. + int Rcom(int sflag); + //This returns 1 if sflag bit set in oflag and in cflag + //If this is the case then it turns off sflag in oflag. + void mark(long,int,const char*); + //This function prints out string in 3rd argument and count + //if first argument is multiple of the second + +//Data + int cflag; //Command, what should happen to resources. + int oflag; //Bit string status of resources, 1 open, 0 closed. + int open1; //flags to mark whether a resource is open or not + int open2; //0 means closed, 1 means open + int open3; //Used for those resources that are either completely + int open4; //closed or completely open. + int open5; + char *type; + int tpnm; //If nonnegative integer it is appended to end of type + //in constructing file name + char *name; + int nmnm; //If nonnegative integer it is appended to end of name + //in constructing file name + int pflag; //Usual print flag, 1 for verbose output, 0 for none + //Print flag set to 1 by default. + int eflag; //Flag set to 1 for external path from path file, 0 + //for internal path + char *path; //Path stored here if eflag=0. + char *pnam; //Path extension stored here if eflag=2. +}; + +//Template functions + +template +void FBase::get_Nnum(const char *a,X &m1){ + ifstream *pfin=get_Istr(a,ios::in); + *pfin >> m1; + dst_Istr(pfin); +} + +template +void FBase::get_Nnum(const char *a,X &m1,Y &m2){ + ifstream *pfin=get_Istr(a,ios::in); + *pfin >> m1 >> m2; + dst_Istr(pfin); +} + +template +void FBase::get_Nnum(const char *a,X &m1,Y &m2,Z &m3){ + ifstream *pfin=get_Istr(a,ios::in); + *pfin >> m1 >> m2 >> m3; + dst_Istr(pfin); +} + +template +void FBase::get_Nnum(long n,const char *a,X &m1){ + ifstream *pfin=get_Istr(n,a,ios::in); + *pfin >> m1; + dst_Istr(pfin); +} + +template +void FBase::get_Nnum(long n,const char *a,X &m1,Y &m2){ + ifstream *pfin=get_Istr(n,a,ios::in); + *pfin >> m1 >> m2; + dst_Istr(pfin); +} + +template +void FBase::get_Nnum(long n,const char *a,X &m1,Y &m2,Z &m3){ + ifstream *pfin=get_Istr(n,a,ios::in); + *pfin >> m1 >> m2 >> m3; + dst_Istr(pfin); +} + +template +void FBase::put_Nnum(const char *a,X &m1){ + ofstream *pfout=get_Ostr(a,ios::out); + *pfout << m1 << endl; + dst_Ostr(pfout); +} + +template +void FBase::put_Nnum(const char *a,X &m1,Y &m2){ + ofstream *pfout=get_Ostr(a,ios::out); + *pfout << m1 << " " << m2 << endl; + dst_Ostr(pfout); +} + +template +void FBase::put_Nnum(const char *a,X &m1,Y &m2,Z &m3){ + ofstream *pfout=get_Ostr(a,ios::out); + *pfout << m1 << " " << m2 << " " << m3 << endl; + dst_Ostr(pfout); +} + +template +void FBase::put_Nnum(long n,const char *a,X &m1){ + ofstream *pfout=get_Ostr(n,a,ios::out); + *pfout << m1 << endl; + dst_Ostr(pfout); +} + +template +void FBase::put_Nnum(long n,const char *a,X &m1,Y &m2){ + ofstream *pfout=get_Ostr(n,a,ios::out); + *pfout << m1 << " " << m2 << endl; + dst_Ostr(pfout); +} + +template +void FBase::put_Nnum(long n,const char *a,X &m1,Y &m2,Z &m3){ + ofstream *pfout=get_Ostr(n,a,ios::out); + *pfout << m1 << " " << m2 << " " << m3 << endl; + dst_Ostr(pfout); +} + +} +#endif diff --git a/Library/Hash.C b/Library/Hash.C index 47355c101981950036def7c391ad3dc35d7d475a..1e8ed26f39f87566c8a3aedf03d2fabf5b6aa99b 100644 --- a/Library/Hash.C +++ b/Library/Hash.C @@ -1,733 +1,733 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "runn.h" -#include "Hash.h" - -using namespace std; -namespace iret { - -Hash::Hash(void) : FBase("hshset","null"){ -} - -Hash::Hash(const char *nam) : FBase("hshset",nam){ -} - -Hash::Hash(int n,const char *nam) : FBase("hshset",n,nam){ -} - -Hash::~Hash(){ -} - -void Hash::create_htable(List &Lst,int excess){ - char cnam[max_str],*cptr,*uptr; - int u,len; - long ct,i,j,k; - ofstream *pfout; - - nwrds=Lst.cnt_key; - ct=nwrds; - tnum=1; - u=0; - while(ct=ct/2){tnum*=2;u++;} - if(u>30){cout << "Error in size, " << u << endl;exit(0);} - i=0; - while((u<32)&&(i30){cout << "Error in size, " << u << endl;exit(0);} - i=0; - while((u<32)&&(i30){cout << "Error in size, " << u << endl;exit(0);} - i=0; - while((u<32)&&(i> nwrds >> tnum >> asize; - dst_Istr(pfin); - - harr=(long*)get_Mmap("ha"); - addr=(long*)get_Mmap("ad"); - strmap=get_Mmap("str"); - - farr=new long[1536]; - ct=1; - for(i=0;i<1536;i++){ - farr[i]=ct=(ct*331)&tnum; - } - - px0=farr,px1=farr+128,px2=farr+256; - px3=farr+384,px4=farr+512,px5=farr+640; - px6=farr+768,px7=farr+896,px8=farr+1024; - px9=farr+1152,px10=farr+1280,px11=farr+1408; -} - -void Hash::gopen_htable_map(int mz){ - char cnam[max_str],*cptr; - int fld; - long ct,asize,i; - - ifstream *pfin=get_Istr(mz,"nm"); - *pfin >> nwrds >> tnum >> asize; - dst_Istr(pfin); - - harr=(long*)get_Mmap(mz,"ha"); - addr=(long*)get_Mmap(mz,"ad"); - strmap=get_Mmap(mz,"str"); - - farr=new long[1536]; - ct=1; - for(i=0;i<1536;i++){ - farr[i]=ct=(ct*331)&tnum; - } - - px0=farr,px1=farr+128,px2=farr+256; - px3=farr+384,px4=farr+512,px5=farr+640; - px6=farr+768,px7=farr+896,px8=farr+1024; - px9=farr+1152,px10=farr+1280,px11=farr+1408; -} - -void Hash::gopen_htable_copy(Hash *pH){ - char cnam[max_str],*cptr; - int fld; - long ct,asize,i; - - nwrds=pH->nwrds; - tnum=pH->tnum; - - harr=pH->harr; - addr=pH->addr; - strmap=pH->strmap; - - farr=pH->farr; - - px0=farr,px1=farr+128,px2=farr+256; - px3=farr+384,px4=farr+512,px5=farr+640; - px6=farr+768,px7=farr+896,px8=farr+1024; - px9=farr+1152,px10=farr+1280,px11=farr+1408; -} - -long Hash::find(const char *str){ - register long ct=0,i=0,k; - register int ic; - register const char *utr=str; - while(ic=*(utr++)){ - switch(i){ - case 0: ct+=*(px0+ic); - break; - case 1: ct+=*(px1+ic); - break; - case 2: ct+=*(px2+ic); - break; - case 3: ct+=*(px3+ic); - break; - case 4: ct+=*(px4+ic); - break; - case 5: ct+=*(px5+ic); - break; - case 6: ct+=*(px6+ic); - break; - case 7: ct+=*(px7+ic); - break; - case 8: ct+=*(px8+ic); - break; - case 9: ct+=*(px9+ic); - break; - case 10: ct+=*(px10+ic); - break; - case 11: ct+=*(px11+ic); - i-=12; - break; - } - i++; - } - k=ct&tnum; - ct=harr[k+1]; - i=harr[k]; -//cout << k << " " << i << " " << addr[i] << " " << ct << " " << addr[ct] << endl; - switch(ct-i){ - case 0: return(0); - break; - case 1: if(!strcmp(str,strmap+addr[i]))return(i+1); - else return(0); - break; - case 2: ic=strcmp(str,strmap+addr[i]); - if(ic>0){ - if(!strcmp(str,strmap+addr[i+1]))return(i+2); - else return(0); - } - else if(ic<0)return(0); - else return(i+1); - break; - default: ic=strcmp(str,strmap+addr[i]); - if(ic<0)return(0); - else if(!ic)return(i+1); - ct--; - ic=strcmp(str,strmap+addr[ct]); - if(ic>0)return(0); - else if(!ic)return(ct+1); - while(ct-i>1){ - k=(ct+i)/2; - ic=strcmp(str,strmap+addr[k]); - if(ic>0)i=k; - else if(ic<0)ct=k; - else return(k+1); - } - return(0); - } -} - -void Hash::gclose_htable_map(void){ - dst_Mmap("ha",(char*)harr); - dst_Mmap("ad",(char*)addr); - dst_Mmap("str",strmap); - delete [] farr; -} - -void Hash::gclose_htable_map(int mz){ - dst_Mmap(mz,"ha",(char*)harr); - dst_Mmap(mz,"ad",(char*)addr); - dst_Mmap(mz,"str",strmap); - delete [] farr; -} - -//Chash code - -Chash::Chash() : Hash(){ - change_type("cshset"); -} - -Chash::Chash(const char *str) : Hash(str){ - change_type("cshset"); -} - -Chash::Chash(int n,const char *str) : Hash(n,str){ - change_type("cshset"); -} - -Chash::~Chash(void){} - -void Chash::create_ctable(Count &Ct,int excess){ - create_htable(Ct,excess); - gopen_htable_map(); - long n,i=0; - long *pct=new long[Ct.cnt_key]; - Ct.node_first(); - while(Ct.node_next()){ - if(n=find(Ct.show_str())){ - pct[n-1]=Ct.count(); - } - else { - cout << "Error in Count tree!" << endl;exit(0); - } - mark(++i,10000,"count terms"); - } - bin_Writ("ct",Ct.cnt_key*sizeof(long),(char*)pct); - delete [] pct; - cnt=(long*)get_Mmap("ct"); - gclose_htable_map(); -} - -void Chash::create_ctable(List &Lt,int excess){ - create_htable(Lt,excess); - gopen_htable_map(); - long n,i=1; - long *pct=new long[Lt.cnt_key]; - Lt.node_first(); - while(Lt.node_next()){ - if(n=find(Lt.show_str())){ - pct[n-1]=i; - } - else { - cout << "Error in List tree!" << endl;exit(0); - } - mark(++i,10000,"count terms"); - } - bin_Writ("ct",Lt.cnt_key*sizeof(long),(char*)pct); - delete [] pct; - cnt=(long*)get_Mmap("ct"); - gclose_htable_map(); -} - -void Chash::create_ctable(int mz,Count &Ct,int excess){ - create_htable(mz,Ct,excess); - gopen_htable_map(mz); - long n,i=0; - long *pct=new long[Ct.cnt_key]; - Ct.node_first(); - while(Ct.node_next()){ - if(n=find(Ct.show_str())){ - pct[n-1]=Ct.count(); - } - else { - cout << "Error in Count tree!" << endl;exit(0); - } - mark(++i,10000,"count terms"); - } - bin_Writ(mz,"ct",Ct.cnt_key*sizeof(long),(char*)pct); - delete [] pct; - cnt=(long*)get_Mmap(mz,"ct"); - gclose_htable_map(mz); -} - -void Chash::create_ctable(int mz,List &Lt,int excess){ - create_htable(mz,Lt,excess); - gopen_htable_map(mz); - long n,i=1; - long *pct=new long[Lt.cnt_key]; - Lt.node_first(); - while(Lt.node_next()){ - if(n=find(Lt.show_str())){ - pct[n-1]=i; - } - else { - cout << "Error in List tree!" << endl;exit(0); - } - mark(++i,10000,"count terms"); - } - bin_Writ(mz,"ct",Lt.cnt_key*sizeof(long),(char*)pct); - delete [] pct; - cnt=(long*)get_Mmap(mz,"ct"); - gclose_htable_map(mz); -} - -void Chash::gopen_ctable_map(void){ - gopen_htable_map(); - cnt=(long*)get_Mmap("ct"); -} - -void Chash::gopen_ctable_map(int mz){ - gopen_htable_map(mz); - cnt=(long*)get_Mmap(mz,"ct"); -} - -void Chash::gclose_ctable_map(void){ - gclose_htable_map(); - dst_Mmap("ct",(char*)cnt); -} - -void Chash::gclose_ctable_map(int mz){ - gclose_htable_map(mz); - dst_Mmap(mz,"ct",(char*)cnt); -} - -long Chash::count(const char *str){ - long n=find(str); - if(n)return(cnt[n-1]); - else return(0); -} - -} - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "runn.h" +#include "Hash.h" + +using namespace std; +namespace iret { + +Hash::Hash(void) : FBase("hshset","null"){ +} + +Hash::Hash(const char *nam) : FBase("hshset",nam){ +} + +Hash::Hash(int n,const char *nam) : FBase("hshset",n,nam){ +} + +Hash::~Hash(){ +} + +void Hash::create_htable(List &Lst,int excess){ + char cnam[max_str],*cptr,*uptr; + int u,len; + long ct,i,j,k; + ofstream *pfout; + + nwrds=Lst.cnt_key; + ct=nwrds; + tnum=1; + u=0; + while(ct=ct/2){tnum*=2;u++;} + if(u>30){cout << "Error in size, " << u << endl;exit(0);} + i=0; + while((u<32)&&(i30){cout << "Error in size, " << u << endl;exit(0);} + i=0; + while((u<32)&&(i30){cout << "Error in size, " << u << endl;exit(0);} + i=0; + while((u<32)&&(i> nwrds >> tnum >> asize; + dst_Istr(pfin); + + harr=(long*)get_Mmap("ha"); + addr=(long*)get_Mmap("ad"); + strmap=get_Mmap("str"); + + farr=new long[1536]; + ct=1; + for(i=0;i<1536;i++){ + farr[i]=ct=(ct*331)&tnum; + } + + px0=farr,px1=farr+128,px2=farr+256; + px3=farr+384,px4=farr+512,px5=farr+640; + px6=farr+768,px7=farr+896,px8=farr+1024; + px9=farr+1152,px10=farr+1280,px11=farr+1408; +} + +void Hash::gopen_htable_map(int mz){ + char cnam[max_str],*cptr; + int fld; + long ct,asize,i; + + ifstream *pfin=get_Istr(mz,"nm"); + *pfin >> nwrds >> tnum >> asize; + dst_Istr(pfin); + + harr=(long*)get_Mmap(mz,"ha"); + addr=(long*)get_Mmap(mz,"ad"); + strmap=get_Mmap(mz,"str"); + + farr=new long[1536]; + ct=1; + for(i=0;i<1536;i++){ + farr[i]=ct=(ct*331)&tnum; + } + + px0=farr,px1=farr+128,px2=farr+256; + px3=farr+384,px4=farr+512,px5=farr+640; + px6=farr+768,px7=farr+896,px8=farr+1024; + px9=farr+1152,px10=farr+1280,px11=farr+1408; +} + +void Hash::gopen_htable_copy(Hash *pH){ + char cnam[max_str],*cptr; + int fld; + long ct,asize,i; + + nwrds=pH->nwrds; + tnum=pH->tnum; + + harr=pH->harr; + addr=pH->addr; + strmap=pH->strmap; + + farr=pH->farr; + + px0=farr,px1=farr+128,px2=farr+256; + px3=farr+384,px4=farr+512,px5=farr+640; + px6=farr+768,px7=farr+896,px8=farr+1024; + px9=farr+1152,px10=farr+1280,px11=farr+1408; +} + +long Hash::find(const char *str){ + register long ct=0,i=0,k; + register int ic; + register const char *utr=str; + while(ic=*(utr++)){ + switch(i){ + case 0: ct+=*(px0+ic); + break; + case 1: ct+=*(px1+ic); + break; + case 2: ct+=*(px2+ic); + break; + case 3: ct+=*(px3+ic); + break; + case 4: ct+=*(px4+ic); + break; + case 5: ct+=*(px5+ic); + break; + case 6: ct+=*(px6+ic); + break; + case 7: ct+=*(px7+ic); + break; + case 8: ct+=*(px8+ic); + break; + case 9: ct+=*(px9+ic); + break; + case 10: ct+=*(px10+ic); + break; + case 11: ct+=*(px11+ic); + i-=12; + break; + } + i++; + } + k=ct&tnum; + ct=harr[k+1]; + i=harr[k]; +//cout << k << " " << i << " " << addr[i] << " " << ct << " " << addr[ct] << endl; + switch(ct-i){ + case 0: return(0); + break; + case 1: if(!strcmp(str,strmap+addr[i]))return(i+1); + else return(0); + break; + case 2: ic=strcmp(str,strmap+addr[i]); + if(ic>0){ + if(!strcmp(str,strmap+addr[i+1]))return(i+2); + else return(0); + } + else if(ic<0)return(0); + else return(i+1); + break; + default: ic=strcmp(str,strmap+addr[i]); + if(ic<0)return(0); + else if(!ic)return(i+1); + ct--; + ic=strcmp(str,strmap+addr[ct]); + if(ic>0)return(0); + else if(!ic)return(ct+1); + while(ct-i>1){ + k=(ct+i)/2; + ic=strcmp(str,strmap+addr[k]); + if(ic>0)i=k; + else if(ic<0)ct=k; + else return(k+1); + } + return(0); + } +} + +void Hash::gclose_htable_map(void){ + dst_Mmap("ha",(char*)harr); + dst_Mmap("ad",(char*)addr); + dst_Mmap("str",strmap); + delete [] farr; +} + +void Hash::gclose_htable_map(int mz){ + dst_Mmap(mz,"ha",(char*)harr); + dst_Mmap(mz,"ad",(char*)addr); + dst_Mmap(mz,"str",strmap); + delete [] farr; +} + +//Chash code + +Chash::Chash() : Hash(){ + change_type("cshset"); +} + +Chash::Chash(const char *str) : Hash(str){ + change_type("cshset"); +} + +Chash::Chash(int n,const char *str) : Hash(n,str){ + change_type("cshset"); +} + +Chash::~Chash(void){} + +void Chash::create_ctable(Count &Ct,int excess){ + create_htable(Ct,excess); + gopen_htable_map(); + long n,i=0; + long *pct=new long[Ct.cnt_key]; + Ct.node_first(); + while(Ct.node_next()){ + if(n=find(Ct.show_str())){ + pct[n-1]=Ct.count(); + } + else { + cout << "Error in Count tree!" << endl;exit(0); + } + mark(++i,10000,"count terms"); + } + bin_Writ("ct",Ct.cnt_key*sizeof(long),(char*)pct); + delete [] pct; + cnt=(long*)get_Mmap("ct"); + gclose_htable_map(); +} + +void Chash::create_ctable(List &Lt,int excess){ + create_htable(Lt,excess); + gopen_htable_map(); + long n,i=1; + long *pct=new long[Lt.cnt_key]; + Lt.node_first(); + while(Lt.node_next()){ + if(n=find(Lt.show_str())){ + pct[n-1]=i; + } + else { + cout << "Error in List tree!" << endl;exit(0); + } + mark(++i,10000,"count terms"); + } + bin_Writ("ct",Lt.cnt_key*sizeof(long),(char*)pct); + delete [] pct; + cnt=(long*)get_Mmap("ct"); + gclose_htable_map(); +} + +void Chash::create_ctable(int mz,Count &Ct,int excess){ + create_htable(mz,Ct,excess); + gopen_htable_map(mz); + long n,i=0; + long *pct=new long[Ct.cnt_key]; + Ct.node_first(); + while(Ct.node_next()){ + if(n=find(Ct.show_str())){ + pct[n-1]=Ct.count(); + } + else { + cout << "Error in Count tree!" << endl;exit(0); + } + mark(++i,10000,"count terms"); + } + bin_Writ(mz,"ct",Ct.cnt_key*sizeof(long),(char*)pct); + delete [] pct; + cnt=(long*)get_Mmap(mz,"ct"); + gclose_htable_map(mz); +} + +void Chash::create_ctable(int mz,List &Lt,int excess){ + create_htable(mz,Lt,excess); + gopen_htable_map(mz); + long n,i=1; + long *pct=new long[Lt.cnt_key]; + Lt.node_first(); + while(Lt.node_next()){ + if(n=find(Lt.show_str())){ + pct[n-1]=i; + } + else { + cout << "Error in List tree!" << endl;exit(0); + } + mark(++i,10000,"count terms"); + } + bin_Writ(mz,"ct",Lt.cnt_key*sizeof(long),(char*)pct); + delete [] pct; + cnt=(long*)get_Mmap(mz,"ct"); + gclose_htable_map(mz); +} + +void Chash::gopen_ctable_map(void){ + gopen_htable_map(); + cnt=(long*)get_Mmap("ct"); +} + +void Chash::gopen_ctable_map(int mz){ + gopen_htable_map(mz); + cnt=(long*)get_Mmap(mz,"ct"); +} + +void Chash::gclose_ctable_map(void){ + gclose_htable_map(); + dst_Mmap("ct",(char*)cnt); +} + +void Chash::gclose_ctable_map(int mz){ + gclose_htable_map(mz); + dst_Mmap(mz,"ct",(char*)cnt); +} + +long Chash::count(const char *str){ + long n=find(str); + if(n)return(cnt[n-1]); + else return(0); +} + +} + diff --git a/Library/Hash.h b/Library/Hash.h index 4b4d7d2aaf2382add9876eb79d1458529bf475e3..96f94fd023f297aec77338900c554f0249bc9e01 100644 --- a/Library/Hash.h +++ b/Library/Hash.h @@ -1,92 +1,92 @@ -#ifndef HASH_H -#define HASH_H - -#include -#include -#include -#include - -namespace iret { - -class Hash : public FBase { -public: - Hash(void); - Hash(const char *nm); - Hash(int n,const char *nm); //n gets appended to type if >-1 - ~Hash(); - - void create_htable(List &Lst,int excess); //"str" for file of strings, - //"ad" for address file, "nm" numbers, - //"ha" hash array. Excess is # powers of 2 above size. - void create_htableM(List &Lst,int excess); //creates in memory ready for use - //and no need to call gopen or gclose functions - void create_htable(int mz,List &Lst,int excess); //"str" for file of strings, - //Creates a numbered version of above - - void gopen_htable_map(void); //Creates memory maps - void gopen_htable_map(int mz); //Creates memory maps - void gclose_htable_map(void); //Destroys memory maps - //and deletes memory - void gclose_htable_map(int mz); //Destroys memory maps - //and deletes memory - void gopen_htable_copy(Hash *pH); //Copies memory maps - - long find(const char *str); //Return number+1 if present, else 0. - //Number is not lexical order but hash order and then lexical - //within collesion groups. - - //Data - char *strmap; //Holds the bit map. - long *addr; //Holds the offsets to strmap. - long nwrds; //Number of words. - long tnum; //Truncation number, size of har. - long *harr; //Holds hash array. - long *farr; //Holds the hash coefficients. - long *px0; - long *px1; - long *px2; - long *px3; - long *px4; - long *px5; - long *px6; - long *px7; - long *px8; - long *px9; - long *px10; - long *px11; -}; - -class Chash : public Hash { -public: - Chash(void); - Chash(const char *nm); - Chash(int n,const char *nm); //n gets appended to type if >-1 - ~Chash(void); - - void create_ctable(Count &Ct,int excess); //Adds "ct" for counts - //Calls create_htable and then prodoces the array of counts. - void create_ctable(int mz,Count &Ct,int excess); //Adds "ct" for counts - //Creates a numbered version of above - void create_ctable(List &Lt,int excess); //Adds "ct" for term # - //and starts the count at 1 and in lexical order. count() will - //return 0 if term not in list. - void create_ctable(int mz,List &Lt,int excess); //Adds "ct" for term # - //Creates a numbered version of above - - void gopen_ctable_map(void); //Calls gopen_htable_map and also - //maps "ct" file. - void gopen_ctable_map(int mz); //Calls gopen_htable_map and also - //maps "ct" file. - void gclose_ctable_map(void); //Calls gclose_htable_map and also - //Unmaps "ct" file. - void gclose_ctable_map(int mz); //Calls gclose_htable_map and also - //Unmaps "ct" file. - - long count(const char *str); //Returns count if present, else 0. - - //Data - long *cnt; -}; - -} -#endif +#ifndef HASH_H +#define HASH_H + +#include +#include +#include +#include + +namespace iret { + +class Hash : public FBase { +public: + Hash(void); + Hash(const char *nm); + Hash(int n,const char *nm); //n gets appended to type if >-1 + ~Hash(); + + void create_htable(List &Lst,int excess); //"str" for file of strings, + //"ad" for address file, "nm" numbers, + //"ha" hash array. Excess is # powers of 2 above size. + void create_htableM(List &Lst,int excess); //creates in memory ready for use + //and no need to call gopen or gclose functions + void create_htable(int mz,List &Lst,int excess); //"str" for file of strings, + //Creates a numbered version of above + + void gopen_htable_map(void); //Creates memory maps + void gopen_htable_map(int mz); //Creates memory maps + void gclose_htable_map(void); //Destroys memory maps + //and deletes memory + void gclose_htable_map(int mz); //Destroys memory maps + //and deletes memory + void gopen_htable_copy(Hash *pH); //Copies memory maps + + long find(const char *str); //Return number+1 if present, else 0. + //Number is not lexical order but hash order and then lexical + //within collesion groups. + + //Data + char *strmap; //Holds the bit map. + long *addr; //Holds the offsets to strmap. + long nwrds; //Number of words. + long tnum; //Truncation number, size of har. + long *harr; //Holds hash array. + long *farr; //Holds the hash coefficients. + long *px0; + long *px1; + long *px2; + long *px3; + long *px4; + long *px5; + long *px6; + long *px7; + long *px8; + long *px9; + long *px10; + long *px11; +}; + +class Chash : public Hash { +public: + Chash(void); + Chash(const char *nm); + Chash(int n,const char *nm); //n gets appended to type if >-1 + ~Chash(void); + + void create_ctable(Count &Ct,int excess); //Adds "ct" for counts + //Calls create_htable and then prodoces the array of counts. + void create_ctable(int mz,Count &Ct,int excess); //Adds "ct" for counts + //Creates a numbered version of above + void create_ctable(List &Lt,int excess); //Adds "ct" for term # + //and starts the count at 1 and in lexical order. count() will + //return 0 if term not in list. + void create_ctable(int mz,List &Lt,int excess); //Adds "ct" for term # + //Creates a numbered version of above + + void gopen_ctable_map(void); //Calls gopen_htable_map and also + //maps "ct" file. + void gopen_ctable_map(int mz); //Calls gopen_htable_map and also + //maps "ct" file. + void gclose_ctable_map(void); //Calls gclose_htable_map and also + //Unmaps "ct" file. + void gclose_ctable_map(int mz); //Calls gclose_htable_map and also + //Unmaps "ct" file. + + long count(const char *str); //Returns count if present, else 0. + + //Data + long *cnt; +}; + +} +#endif diff --git a/Library/MPtok.C b/Library/MPtok.C index c119909f54db1a8bc58c90eb84bac491836dd3b6..fd4ad085a94fcec881b809682a3905f9b89482fc 100644 --- a/Library/MPtok.C +++ b/Library/MPtok.C @@ -1,2036 +1,2036 @@ -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "MPtok.h" - -// These options are probably compile time constants - -static char option_tagsep = '_'; // The tagsep character -static char option_replacesep = '-'; // Replace tagsep with this - -static void chomp(char *line) -{ - int i; - - i = strlen(line) - 1; - while (i >= 0 && line[i] == '\n' || line[i] == '\r') - line[i--] = '\0'; -} - -// Data structure and algorithm for finding common pairs. - -// read a file of pairs into a data structure, -// the file must be sorted first - -void MPtok::init_pair(const string& file_name) -{ - filebuf fb; - fb.open(file_name.c_str(), ios::in); - istream is(&fb); - string pair; - - while (1) - { - getline(is, pair); - if (is.fail()) break; - if (pair.size() > 0) common_pair.insert(pair); - } - - fb.close(); -} - -// List of abbreviations in 3 categories -// ABB = can occur mid sentence -// EOS = can occur at end of sentence -// NUM = only used before numbers - -void MPtok::init_abbr(const string& file_name) -{ - filebuf fb; - fb.open(file_name.c_str(), ios::in); - istream is(&fb); - string typ, abb; - map val; - val["ABB"] = ABB_ABB; val["EOS"] = ABB_EOS; val["NUM"] = ABB_NUM; - - while (is.good()) - { - is >> typ; - if (val.count(typ)) - { - is >> abb; - if (abb.size() > 0) common_abbr[abb] = val[typ]; - } - } - fb.close(); -} - -static char nextchar(const char *t, int i) -{ - while (isspace(t[i])) i++; - return t[i]; -} - -// Look for a token at or prior to the text position - -static int lookbehind(const char *t, int i, const char *s, int *tokflag) -{ - int k = (int) strlen(s) - 1; - - while (i > 0 && isspace(t[i])) i--; - - while (k >= 0 && i >= 0) - { - if (k > 0 && tokflag[i]) break; - - if (tolower(s[k]) != tolower(t[i])) - return -1; - k--; - i--; - } - - return (k < 0 && tokflag[i+1]) ? i + 1 : -1; -} - -// Look for a token at or following the text position - -static int lookahead(const char *t, int i, const char *s, int *tokflag) -{ - int k = 0; - - while (isspace(t[i])) i++; - - while (k < strlen(s) && i < strlen(t)) - { - if (k > 0 && tokflag[i]) break; - - if (tolower(s[k]) != tolower(t[i])) - return -1; - k++; - i++; - } - - return (k == strlen(s) && tokflag[i]) ? i - (int) strlen(s) : -1; -} - -// Set the initial tokens at spaces - -void MPtok::tok_0() -{ - int i; - - tokflag[0] = 1; - for (i = 1; i < text_len; i++) - { - tokflag[i] = isspace(text[i]) || (i > 0 && isspace(text[i - 1])) ? 1 : 0; - } - tokflag[i] = 1; -} - -// Get quotes preceded by open parens -// -// A double quote, preceded by a space or open bracket is a separate token -// - -void MPtok::tok_1() -{ - for (int i = 1; i < text_len; i++) - { - if (text[i] == '"' && strchr("([{<", text[i-1])) - { - tokflag[i] = 1; - if (i + 1 < text_len) tokflag[i+1] = 1; - } - } -} - -// Look for ellipses -// -// Three dots in a row is a separate token - -void MPtok::tok_2() -{ - for (int i = 1; i + 2 < text_len; i++) - { - if (strncmp(&text[i], "...", 3) == 0) - { - tokflag[i] = 1; - if (i + 3 < text_len) tokflag[i+3] = 1; - } - } -} - -// Non-sentence-ending punctuation -// -// Certain punctuation characters are separate tokens - -void MPtok::tok_3() -{ - for (int i = 0; i < text_len; i++) - { - // If it is a comma and the next char is not a space and option_comma = 0 - - if (option_comma == 0 && text[i] == ',' && isspace(text[i + 1]) == 0) - { - // do nothing - } else if (strchr(",;:@#$%&", text[i])) - { - tokflag[i] = 1; - tokflag[i + 1] = 1; - } - } -} - -// Separate the slashes -// -// Slashes are a separate token -// except for +/-, +/+, -/-, -/+, and and/or. - -void MPtok::tok_5_6_7() -{ - for (int i = 0; i < text_len; i++) - { - if (text[i] == '/') - { - tokflag[i] = 1; - if (i+1 < text_len) tokflag[i+1] = 1; - - // Put back +/-, etc, unless option_hyphen is 1 - - if (i - 1 >= 0 - && i + 1 < text_len - && ((option_new < 9 - && text[i - 1] == '+' || (text[i - 1] == '-' && option_hyphen == 0) - && text[i + 1] == '+' || (text[i + 1] == '-' && option_hyphen == 0)) - || (option_new >= 9 - && (text[i - 1] == '+' || text[i - 1] == '-') - && (text[i + 1] == '+' || text[i + 1] == '-')))) - { - tokflag[i - 1] = 1; - tokflag[i] = tokflag[i+1] = 0; - tokflag[i + 2] = 1; - } - - // Put back and/or, etc - - if (option_new <= 7) - { - if (i > 5 && strncmp(text + i - 5, " and/or ", 8) == 0) - { - for (int j = 1; j < 5; j++) - tokflag[i - 2 + j] = 0; - } - } else - { - if (i > 4 && strncmp(text + i - 4, " and/or ", 8) == 0) - { - for (int j = 1; j < 6; j++) - tokflag[i - 3 + j] = 0; - } - } - } - } -} - -// All brackets -// -// Any open or closed bracket is a separate token -// -// Exclamation and question mark -// -// Any question or exclamation mark is a separate token - -void MPtok::tok_8_9() -{ - for (int i = 0; i < text_len; i++) - { - if (strchr("[](){}<>", text[i]) - || strchr("?!", text[i])) - { - tokflag[i] = 1; - if (i + 1 < text_len) tokflag[i+1] = 1; - } - } -} - -// Period at the end of a string may be followed by closed-bracket or quote -// -// A period that is preceded by a non-period -// and optionally followed by a close paren -// and any amount of space at the end of the string -// is a separate token. - -void MPtok::tok_10() -{ - for (int i = text_len - 1; i >= 0; i--) - { - if (isspace(text[i])) continue; - if (strchr("])}>\"'", text[i])) continue; - if (text[i] != '.') break; - if (text[i] == '.' && (i - 1 < 0 || text[i-1] != '.')) - { - tokflag[i] = 1; - if (i + 1 < text_len) tokflag[i+1] = 1; - } - } -} - -// Period followed by a capitalized word -// -// A period preceded by a character that is not another period and not a space -// and followed by a space then an upper case letter is a separate token - -void MPtok::tok_11() -{ - for (int i = 0; i < text_len; i++) - { - if (text[i] == '.' - && (i + 1 < text_len && isspace(text[i+1])) - && (i - 1 < 0 || text[i - 1] != '.' || isspace(text[i-1]) == 0) - && isupper(nextchar(text, i + 1))) - tokflag[i] = 1; - } -} - -// A normal word followed by a period -// -// A period followed by a space -// and preceded by 2 or more alphabetic characters or hyphens -// is a separate token - -void MPtok::tok_12() -{ - int wcnt = 0; - - for (int i = 0; i < text_len; i++) - { - if (text[i] == '.' - && tokflag[i + 1] - && wcnt >= 2) - tokflag[i] = 1; - - if (isalpha(text[i]) || text[i] == '-') - ++wcnt; - else - wcnt = 0; - } -} - -// A non-normal token (that has no lower case letters) followed by a period -// -// A period at the end of a token made of characters excluding lower case -// is a separate token - -void MPtok::tok_13() -{ - int stok = 0; - int wcnt = 0; - - for (int i = 0; i < text_len; i++) - { - if (text[i] == '.' - && tokflag[i + 1] - && wcnt >= 2) - tokflag[i] = 1; - - if (tokflag[i] == 1) stok = 1; - - if (islower(text[i]) || text[i] == '.') - { - stok = 0; - wcnt = 0; - } - - if (stok) - wcnt++; - } -} - -// put some periods with single-letter abbreviations -// -// A single alphabetic token followed by a period followed -// by a token that does not begin with an upper case letter -// or number is taken to be an abbreviation and the period -// does not start a new token. -// -// NOTE: This does not recognize initials in people's names, -// that problem is not simply solved. - -void MPtok::tok_14() -{ - for (int i = 0; i < text_len; i++) - { - if (text[i] == '.' - && i - 1 >= 0 && isalpha(text[i - 1]) && tokflag[i - 1] - && tokflag[i + 1] - && isupper(nextchar(text, i + 1)) == 0 - && isdigit(nextchar(text, i + 1)) == 0 - && nextchar(text, i + 1) != '(' - ) - { - tokflag[i] = 0; - } - } -} - -void MPtok::tok_15() -{ - int i, j, k, a; - char buff[MAX_ABB + 1]; - - for (i = 0; i < text_len; i++) - { - // only start at a current token - - if (! tokflag[i]) continue; - - // find alphabetic followed by period - - buff[0] = '\0'; - for (k = 0; i + k < text_len && k < MAX_ABB; k++) - { - buff[k] = text[i+k]; buff[k+1] = '\0'; - if (k > 0 && buff[k] == '.') break; // this is good - if (! isalpha(buff[k])) { buff[0] = '\0'; break; } // this is not good - } - - if (buff[0] == '\0' || i + k == text_len || k == MAX_ABB) continue; - - // at this point, buff[k] == '.' add 1 to make it the length - - k++; - - // if not found, try finding a concatenated abbrev - - if (! common_abbr.count(buff)) - { - for (; i + k < text_len && k < MAX_ABB; k++) - { - buff[k] = text[i+k]; buff[k+1] = '\0'; - if (k > 0 && buff[k] == '.') break; // this is good - if (! isalpha(buff[k])) { buff[0] = '\0'; break; } // this is not good - } - - if (buff[0] == '\0' || i + k == text_len || k == MAX_ABB) continue; - - // at this point, buff[k] == '.' add 1 to make it the length - - k++; - } - - // if not found, give up - - if (! common_abbr.count(buff)) continue; - - if (common_abbr[buff] == ABB_NUM) - { - for (j = i + k; j < text_len && isspace(text[j]); j++) ; // next must be a number - if (! isdigit(text[j])) continue; // go to next abbreviation - } else if (common_abbr[buff] == ABB_EOS) - { - for (j = i + k; j < text_len && isspace(text[j]); j++) ; // if next token is upper case letter - if (isupper(text[j])) tokflag[i + (--k)] = 1; // tokenize the final period of this abbreviation - } - - // clear all token flags - - for (j = 1; j < k; j++) tokflag[i + j] = 0; - } -} - -// Check for common pairs that should not be considered sentence breaks - -void MPtok::tok_15_1() -{ - int i, j, k, tnum, p; - char buff[MAX_ABB + 1]; - - for (i = 0; i < text_len; i++) - { - if (! tokflag[i]) continue; - - // must be alphanumeric token followed by period token followed by space followed by alphanumeric token - - tnum = 0; - buff[0] = '\0'; - for (p = k = 0; i + k < text_len && k < MAX_ABB; k++) - { - buff[k] = text[i+k]; buff[k+1] = '\0'; - - if (isspace(buff[k])) - { - if (tnum == 2) break; // this is good - else if (tnum == 1) continue; // ok - else { buff[0] = '\0'; break; } // this shouldn't happen - } - - if (tokflag[i+k]) - { - if (tnum > 2) break; // done - else tnum++; - } - - if (tnum == 1 && buff[k] == '.') p = k; - if (tnum == 1 && buff[k] != '.') { buff[0] = '\0'; break; } // nope - if (! isalnum(buff[k])) { buff[0] = '\0'; break; } // nope - } - - if (buff[0] == '\0' || i + k == text_len || k == MAX_ABB) continue; - - // at this point buff is a potential pair, so untokenize the period, that's all - - if (common_pair.count(buff)) - tokflag[p] = 0; - } -} - -// Get cases where a space after a sentence has been omitted -// -// A period that occurs in a token consisting of alphabetic -// letters with a vowel to the left and the right is a -// separate token. - -void MPtok::tok_16() -{ - int j; - int has_vowel; - - for (int i = 0; i < text_len; i++) - { - if (text[i] == '.' && tokflag[i] == 0) - { - has_vowel = 0; - for (j = i - 1; j >= 0; --j) - { - if (isalpha(text[j]) == 0) - break; - if (strchr("aeiouAEIOU", text[j])) - has_vowel = 1; - if (tokflag[j]) - break; - } - if ((j >= 0 && tokflag[j] == 0) || has_vowel == 0) - continue; - - j = i + 1; - - has_vowel = 0; - for (; j < text_len && tokflag[j] == 0; ++j) - { - if (isalpha(text[j]) == 0) - break; - if (strchr("aeiouAEIOU", text[j])) - has_vowel = 1; - } - - if ((j < text_len && tokflag[j] == 0) || has_vowel == 0) - continue; - - tokflag[i] = 1; - tokflag[i + 1] = 1; - } - } -} - -// Correction to tok_16, -// Don't count if the token before is a single letter -// or the token following is a single letter other than 'a'. -// Also, don't count if the token to the right is gov, com, edu, etc. -// because those are web addresses! - -#define COMPLEX_WINDOW 40 - -enum {COMPLEX_NOT = 0, COMPLEX_YES, COMPLEX_DONE}; - -struct _complex { - int flag; - int offset; - const char *str; - int len; -} complex[] = { - COMPLEX_YES, 0, "complex", 7, - COMPLEX_NOT, 0, "complexi", 8, - COMPLEX_NOT, 0, "complexed", 9, - COMPLEX_NOT, 0, "complexa", 8, - COMPLEX_NOT, 0, "complex-", 8, - COMPLEX_NOT, 0, "complexl", 8, - COMPLEX_NOT, 0, "complexu", 8, - COMPLEX_NOT, -1, "-complex", 7, - COMPLEX_NOT, -2, "nocomplex", 9, - COMPLEX_NOT, -3, "subcomplex", 10, - COMPLEX_YES, 0, "hybrid", 6, - COMPLEX_NOT, 0, "hybridi", 7, - COMPLEX_NOT, 0, "hybrido", 7, - COMPLEX_NOT, 0, "hybrida", 7, - COMPLEX_NOT, 0, "hybrid-", 7, - COMPLEX_NOT, -1, "-hybrid", 7, - COMPLEX_YES, 0, "duplex", 6, - COMPLEX_NOT, -1, "oduplex", 7, - COMPLEX_DONE, 0, NULL, 0, -}; - -int MPtok::complex_check() -{ - int last_period = -2*COMPLEX_WINDOW; - int last_complex = -2*COMPLEX_WINDOW; - int i, j; - int complex_match; - - for (i = 0; i < text_len; i++) - { - if (text[i] == '.') - { - if (i - last_complex <= COMPLEX_WINDOW) - return 1; - last_period = i; - } - - complex_match = 0; - for (j = 0; complex[j].str; j++) - { - if (complex[j].flag == COMPLEX_NOT) - { - if (i + complex[j].offset >= 0 - && strncmp(text+i+complex[j].offset, complex[j].str, complex[j].len) == 0) - { - // don't match here - complex_match = 0; - } - } else if (complex[j].flag == COMPLEX_YES) - { - if (i + complex[j].offset >= 0 - && strncmp(text+i+complex[j].offset, complex[j].str, complex[j].len) == 0) - { - // match here - complex_match = 1; - } - } - } - - if (complex_match) - { - if (i - last_period <= COMPLEX_WINDOW) - return 1; - last_complex = i; - } - } - return 0; -} - -void MPtok::tok_16_1() -{ - int i, j; - char v1, v2; - int c1, c2; - - if (option_new == 3 && strstr(text, "complex")) - return; - - if (option_new >= 4 && complex_check()) - return; - - for (i = 0; i < text_len; i++) - { - if (text[i] == '.' && tokflag[i] == 0) - { - char suffix[10]; - int s_i; - - v1 = '\0'; - c1 = 0; - for (j = i - 1; j >= 0; --j) - { - if (isalpha(text[j]) == 0) - break; - if (strchr("aeiouAEIOU", text[j])) - v1 = tolower(text[j]); - c1++; - if (tokflag[j]) - break; - } - if ((j >= 0 && tokflag[j] == 0) - || v1 == '\0' - || c1 == 1) - continue; - - j = i + 1; - - v2 = '\0'; - c2 = 0; - s_i = 0; - for (; j < text_len && tokflag[j] == 0; ++j) - { - if (isalpha(text[j]) == 0) - break; - if (strchr("aeiouAEIOU", text[j])) - v2 = tolower(text[j]); - if (s_i < 3) - suffix[s_i++] = tolower(text[j]); suffix[s_i] = '\0'; - c2++; - } - - if ((j < text_len && tokflag[j] == 0) - || v2 == '\0' - || (c2 == 1 && v2 != 'a') - || (c2 == 3 && tokflag[j] == 1 && s_i == 3 - && (strcmp(suffix, "gov") == 0 - || strcmp(suffix, "edu") == 0 - || strcmp(suffix, "org") == 0 - || strcmp(suffix, "com") == 0))) - continue; - - tokflag[i] = 1; - tokflag[i + 1] = 1; - } - } -} - - -// Numeric endings of sentences -// -// A period after a numeric token followed by a token that starts -// with an alphabetic character, is a separate token. -// -// This should be covered already by tok_13 - -void MPtok::tok_17() -{ - int j; - - for (int i = 0; i < text_len; i++) - { - if (text[i] == '.' - && tokflag[i] == 0 - && tokflag[i + 1]) - { - for (j = i - 1; j >= 0 && isdigit(text[j]) && tokflag[j] == 0; --j) - ; - if (j >= 0 && j < i - 1 && tokflag[j] && isalpha(nextchar(text, i + 1))) - tokflag[i] = 1; - } - } -} - -// period at end of string is a token - -void MPtok::tok_20() -{ - for (int i = text_len - 1; i >= 0; --i) - { - if (isspace(text[i])) - continue; - - if (strchr(".!?", text[i])) - tokflag[i] = 1; - - break; - } -} - -// a period that follows a non-common word, and that is -// followed by a lower case common word is probably not a token - -void MPtok::tok_20_1() -{ - int j; - - for (int i = 0; i < text_len; ++i) - { - if (text[i] == '.' && tokflag[i] == 1) - { - int tcnt, lcnt, ocnt; - tcnt = lcnt = ocnt = 0; - - // make sure the previous word was *not* common - - for (j = i - 1; j >= 0; j--) - { - if (isspace(text[j])) continue; - if (option_new >= 2) - { - if (islower(text[j]) == 0 && text[j] != '-') ocnt++; - } else - { - if (! islower(text[j])) ocnt++; - } - - if (tokflag[j] || j == 0) - { - if (ocnt == 0) - { - goto nexti; - } - break; - } - } - - tcnt = lcnt = ocnt = 0; - - // make sure the next word is common - - for (j = i + 1; j < text_len; j++) - { - if (isspace(text[j])) continue; - if (tokflag[j]) tcnt++; - - if (tcnt == 2 || j == text_len - 1) - { - if (lcnt > 0 && ocnt == 0) tokflag[i] = 0; - break; - } - - if (islower(text[j])) lcnt++; - else ocnt++; - } - } -nexti: ; - } -} - -// tokenized period followed by non-space other than close paren -// is not a token - -void MPtok::tok_20_2() -{ - int j; - - for (int i = 0; i < text_len - 1; ++i) - { - if (text[i] == '.' && tokflag[i] == 1 - && strchr(" ()[]\"\'\n\t\r", text[i+1]) == 0) - { - tokflag[i] = 0; - } - } -} - - -// long dash -// -// A pair of hyphens is a complete token - -void MPtok::tok_21() -{ - for (int i = 0; i + 1 < text_len; i++) - { - if (strncmp(&text[i], "--", 2) == 0) - { - tokflag[i] = 1; - if (i + 2 < text_len) - { - i += 2; - tokflag[i] = 1; - } - } - } -} - -// hyphens -// -// If specified as an option, a hyphen between letters is a complete token - -void MPtok::tok_21a() -{ - if (option_hyphen == 0) return; - - for (int i = 0; i + 1 < text_len; i++) - { - if (text[i] == '-' - && (i == 0 || text[i-1] != '-') - && text[i+1] != '-') - { - tokflag[i] = 1; - tokflag[i+1] = 1; - } - } -} - - -// quote -// -// Any double quote is a separate token - -void MPtok::tok_22() -{ - for (int i = 0; i < text_len; i++) - { - if (text[i] == '"') - { - tokflag[i] = 1; - if (i + 1 < text_len) - { - i += 1; - tokflag[i] = 1; - } - } - } -} - -// possessive -// -// Any single quote at the end of a token that is not -// preceded by a single quote is a separate token - -void MPtok::tok_23() -{ - for (int i = 0; i < text_len; i++) - { - if (text[i] == '\'' - && (i - 1 >= 0 && text[i - 1] != '\'') - && tokflag[i + 1]) - { - tokflag[i] = 1; - } - } -} - - -// quote -// -// If a single quote starts a token, or is preceded by a -// single quote, and followed by a character -// that is not a single quote, then -// the character to it's right is the start of a new token - -void MPtok::tok_24() -{ - for (int i = 0; i < text_len; i++) - { - if (text[i] == '\'' - && (tokflag[i] == 1 || (i - 1 >= 0 && text[i - 1] == '\'')) - && (i + 1 < text_len && text[i + 1] != '\'')) - { - tokflag[i + 1] = 1; - } - } -} - -// put back possessive -// -// A single quote that is a whole token followed by a lower case s -// that is also a whole token (without space between them) -// should be merged into a single token - -void MPtok::tok_25() -{ - for (int i = 0; i < text_len; i++) - { - if (text[i] == '\'' - && tokflag[i] == 1 - && i + 1 < text_len && text[i + 1] == 's' - && tokflag[i+1] == 1 - && (i + 2 >= text_len || isspace(text[i + 2]) || tokflag[i + 2] == 1)) - { - tokflag[i + 1] = 0; - } - } -} - -// quote -// -// A pair of single quotes is a separate token - -void MPtok::tok_26() -{ - for (int i = 0; i < text_len; i++) - { - if (strncmp(&text[i], "''", 2) == 0 - || strncmp(&text[i], "``", 2) == 0) - { - tokflag[i] = 1; - if (i + 2 < text_len) tokflag[i + 2] = 1; - } - } -} - -// possessive -// -// A single quote followed by a letter s is a possessive - -void MPtok::tok_27() -{ - for (int i = 0; i < text_len; i++) - { - if (text[i] == '\'' - && i + 1 < text_len - && tolower(text[i + 1]) == 's' - && (i + 2 >= text_len || tokflag[i + 2])) - { - tokflag[i] = 1; - } - } -} - -// split "cannot" to "can not" -// -// A single token that is the word cannot (in any case) -// is split into two words - -void MPtok::tok_28() -{ - for (int i = 0; i < text_len; i++) - { - if ((strncmp(&text[i], "cannot", 6) == 0 - || strncmp(&text[i], "Cannot", 6) == 0) - && tokflag[i + 6]) - { - tokflag[i + 3] = 1; - } - } -} - -// put list item elements back at sentence end -// -// A period that is preceded by an alphanumeric (no space) -// and any amount of preceding space and an end-mark -// stays with the alphanumeric. - -void MPtok::tok_29() -{ - int j; - - for (int i = 0; i < text_len; i++) - { - if (text[i] == '.' - && tokflag[i] && tokflag[i + 1] - && i - 1 >= 0 && isalnum(text[i - 1]) - && tokflag[i - 1] - && ((j = lookbehind(text, i-2, ".", tokflag)) >= 0 - || (j = lookbehind(text, i-2, "?", tokflag)) >= 0 - || (j = lookbehind(text, i-2, "!", tokflag)) >= 0) - && tokflag[j]) - { - tokflag[i] = 0; - } - } -} - -// attach list elements to the beginnings of their sentences -// this means, attach the period to the list element -// -// a list element is a single letter or a one or two digits -// which is preceded by an end of sentence ".!?;" -// or colon (provided it doesn't belong to a proportion construct) - -void MPtok::tok_29a() -{ - int i, j; - - for (i = 0; i < text_len; i++) - { - if (text[i] == '.' && tokflag[i]) - { - // Look back, make sure the token before the period - // is either single alphanumeric, or at most a two digit number - // and the character before that is a punctuation ".?!:," - - int tcnt, acnt, dcnt, pcnt, ocnt, scnt; - tcnt = acnt = dcnt = pcnt = ocnt = scnt = 0; - char p; - - for (j = i - 1; j >= 0; j--) - { - if (isspace(text[j])) { scnt++; continue; } - else if (tcnt == 0 && isalpha(text[j])) ++acnt; - else if (tcnt == 0 && isdigit(text[j])) ++dcnt; - else if (tcnt == 1 && strchr(".!?:;,", text[j])) { pcnt++; p = text[j]; } - else ocnt++; - - if (tokflag[j] || j == 0) - { - tcnt++; - if (tcnt == 1 && ocnt == 0 && scnt == 0 - && ((acnt == 1 && dcnt == 0) || (acnt == 0 && dcnt > 0 && dcnt <= 2))) - { - // This is acceptable - } else if (tcnt == 2 && pcnt <= 1 && ocnt == 0 && scnt > 0) - { - if (p == ':') - { - while (--j >= 0 && isspace(text[j])) - ; - if (j >= 0 && isdigit(text[j])) - { - // It's probably a proportion - break; - } - } - // Jackpot - tokflag[i] = 0; - } else - { - // This is not - break; - } - scnt = 0; - } - } - } - } -} - -// list elements at the beginning of a string -// -// An alphanumeric token followed by a period -// at the beginning of the line stays with the -// alphanumeric - -void MPtok::tok_30() -{ - int i = 0; - - while (isspace(text[i])) i++; - - if (isalnum(text[i]) - && tokflag[i] - && i + 1 < text_len - && text[i + 1] == '.' - && tokflag[i + 1]) - { - tokflag[i + 1] = 0; - } -} - -// process American style numbers - -void MPtok::tok_31() -{ - int j; - - for (int i = 0; i < text_len; i++) - { - if (text[i] == ',' - && i + 3 < text_len - && tokflag[i] && tokflag[i + 1] - && isdigit(text[i + 1]) - && isdigit(text[i + 2]) - && isdigit(text[i + 3]) - && i - 1 >= 0 && isdigit(text[i - 1]) - ) - { - tokflag[i] = 0; - tokflag[i + 1] = 0; - } - } -} - -// process British style numbers - -void MPtok::tok_32() -{ - int j; - - for (int i = 0; i < text_len; i++) - { - if (text[i] == ' ' - && i + 3 < text_len - && tokflag[i] && tokflag[i + 1] - && isdigit(text[i + 1]) - && isdigit(text[i + 2]) - && isdigit(text[i + 3]) - && i - 1 >= 0 && isdigit(text[i - 1]) - ) - { - tokflag[i] = 0; - tokflag[i + 1] = 0; - } - } -} - -// tokenize unicode escapes -// -// Added - -void MPtok::tok_33() -{ - int j; - - for (int i = 0; i < text_len; i++) - { - if (text[i] == '&') - { - if (text[i + 1] == '#') - { - for (j = i + 2; isdigit(text[j]); j++) - ; - } else - { - for (j = i + 1; isalpha(text[j]); j++) - ; - } - - if (text[j] == ';') - { - // Tokenize the escape, untokenize everything inside - - tokflag[i] = 1; - for (i++; i <= j; i++) tokflag[i] = 0; - tokflag[i] = 1; - } - } - } -} - -// Remove tags if they are present - -void MPtok::tok_un() -{ - int untok = 0; - for (int i = 0; text[i]; ++i) - { - if (isspace(text[i])) untok = 0; - if (text[i] == option_tagsep) untok = 1; - if (untok) text[i] = ' '; - } -} - - -void MPtok::set_tokflag() -{ - int i; - - tok_0(); - tok_1(); - tok_2(); - tok_3(); - - // step 4 replaces tag char, this is done at output - - tok_5_6_7(); - tok_8_9(); - - tok_10(); - tok_11(); - if (option_new >= 1) - { - tok_21(); - tok_21a(); - tok_22(); - tok_23(); - tok_24(); - tok_25(); - tok_26(); - tok_27(); - } - tok_12(); - tok_13(); - tok_14(); - if (option_new <= 5) - tok_15(); - if (option_new < 2) - tok_16(); - tok_17(); - - // steps 18 and 19 recognize periods within parens, - // and this is moved to the segmentation section - - tok_20(); - if (option_new >= 1) - { - tok_20_1(); - tok_20_2(); - if (option_new >= 2) - tok_16_1(); - if (option_new >= 6) - tok_15(); - if (option_new >= 7) - tok_15_1(); - } - if (option_new < 1) - { - tok_21(); - tok_21a(); - tok_22(); - tok_23(); - tok_24(); - tok_25(); - tok_26(); - tok_27(); - } - tok_28(); - if (option_new >= 1) - tok_29a(); - else - tok_29(); - tok_30(); - tok_31(); - tok_32(); - - tok_33(); -} - -/* set_endflag -** -** After tokflag has been set, find the possible sentence endings. -*/ - -void MPtok::set_endflag() -{ - int i; - - // The following tests look for end-stops and label them. - // They include steps 18 and 19 - - for (i = 0; i <= text_len; i++) - endflag[i] = 0; - - // Count the number of unmatched parens - - int up = 0; // unmatched round parens - int ub = 0; // unmatched brackets - - for (i = 0; i < text_len; i++) - { - if (text[i] == '(') ++up; - if (text[i] == ')') --up; - if (text[i] == '[') ++ub; - if (text[i] == ']') --ub; - if (up < 0) up = 0; - if (ub < 0) ub = 0; - } - - // Now find the end-of-sentence marks - - // tok_18: periods within parentheses, allow for nesting - // tok_19: periods within brackets, allow for nesting - // the perl version solves this by putting the period - // back with the previous token, but a better solution - // is to allow it to be tokenized but just don't - // allow it to be an end-of-sentence. - // Therefore, these are moved to the segmentation - // section - - int p = 0; // round parens - int b = 0; // brackets - - for (i = 0; i < text_len; i++) - { - if (text[i] == '(') ++p; - if (text[i] == ')') --p; - if (text[i] == '[') ++b; - if (text[i] == ']') --b; - if (p < 0) p = 0; - if (b < 0) b = 0; - - if (strchr(".!?", text[i]) - && tokflag[i] - && tokflag[i + 1]) - { - if (option_segment && p <= up && b <= ub) - endflag[i] = 1; - - // This is optional to join periods with - // probable abbreviations - - if (p > up || b > ub) - tokflag[i] = 0; - } - } - - // endtokens followed by a single or double quote, which matches - // a single or double quote in the previous sentence - - if (option_new >= 1) - { - int dquo, squo; - dquo = squo = 0; - - for (i = 0; i < text_len; i++) - { - if (text[i] == '"') dquo = ! dquo; - else if (text[i] == '\'') squo = ! squo; - else if (endflag[i]) - { - if ((text[i+1] == '"' && dquo) || (text[i+1] == '\'' && squo)) - { - endflag[i] = 0; - - // But don't end at all if the next token is something - // other than an upper case letter. - - if (option_new >= 2) - { - int j; - int ok = 0; - - for (j = i + 2; j < text_len; j++) - { - if (isspace(text[j])) continue; - // if (isupper(text[j])) - if (isupper(text[j]) || text[j] == '(') - { - ok = 1; - break; - } - if (tokflag[j]) break; - } - - if (ok) - endflag[i+1] = 1; - } else - { - endflag[i+1] = 1; - } - } - dquo = squo = 0; - } - } - } -} - - -/* set_endflag_01 -** -** After tokflag has been set, find the possible sentence endings. -** This has improved paren matching. -*/ - -#define MAX_MATCH 500 // Maximum length to get a paren match - -void MPtok::set_endflag_01() -{ - int match[text_len]; - int i, j; - - // The following tests look for end-stops and label them. - // They include steps 18 and 19 - - for (i = 0; i <= text_len; i++) - endflag[i] = 0; - - for (i = 0; i < text_len; i++) - match[i] = 0; - - for (i = text_len - 1; i >= 0; i--) - { - if (text[i] == '(' || text[i] == '[') - { - for (j = i + 1; text[j] && j - i <= MAX_MATCH; j++) - { - // Skip parens that are already matched - - if (match[j] > j) - { - j = match[j]; - continue; - } - - // Look for a matching close paren - - if (match[j] == 0 - && ((text[i] == '(' && text[j] == ')') - || (text[i] == '[' && text[j] == ']'))) - { - match[i] = j; - match[j] = i; - break; - } - } - } - } - - int next_match = 0; - for (i = 0; i < text_len; i++) - { - if (match[i] > next_match) - next_match = match[i]; - - if (strchr(".!?", text[i]) - && tokflag[i] - && tokflag[i + 1] - && (option_new <= 4 || option_doteos == 1 || (i > 0 && isspace(text[i-1]) == 0))) - { - if (i <= next_match) - tokflag[i] = 0; - else if (option_segment) - endflag[i] = 1; - } - } - - // endtokens followed by a single or double quote, which matches - // a single or double quote in the previous sentence - - int dquo, squo; - dquo = squo = 0; - - for (i = 0; i < text_len; i++) - { - if (option_new <= 7 && text[i] == '"') dquo = ! dquo; - else if (option_new >= 8 && text[i] == '"' && tokflag[i] && tokflag[i+1]) dquo = ! dquo; - else if (option_new <= 7 && text[i] == '\'') squo = ! squo; - else if (option_new >= 8 && text[i] == '\'' - && tokflag[i] && (tokflag[i+1] || (text[i+1] == '\'' && tokflag[i+2]))) squo = ! squo; - else if (endflag[i]) - { - if ((text[i+1] == '"' && dquo) || (text[i+1] == '\'' && squo)) - { - endflag[i] = 0; - - // But don't end at all if the next token is something - // other than an upper case letter. - - if (option_new >= 2) - { - int j; - int ok = 0; - - for (j = i + 2; j < text_len; j++) - { - if (isspace(text[j])) continue; - // if (isupper(text[j])) - if (isupper(text[j]) || text[j] == '(') - { - ok = 1; - break; - } - if (tokflag[j]) break; - } - - if (ok) - endflag[i+1] = 1; - } else - { - endflag[i+1] = 1; - } - } - dquo = squo = 0; - } - } -} - - -// Size buffer: return the size of the buffer required to hold all of the tokenized text. -// It can be simply estimated by a formula that depends only on the length of text and number of tokens. - -int MPtok::size_buff() -{ - int size = 1; // Start with null terminator - int t = option_pretag.size(); // for each tag, the length of the UNTAG string - - if (t <= 0) t = 1; // Make sure there is at least one - t += 2; // Add one for underscore and one for space - - for (int i = 0; i < text_len; i++) - { - size++; // Count all characters - if (tokflag[i]) size += t; // Count token delimiters (may overcount) - if (endflag[i]) size++; // Add one for newline - } - return size; -} - - -/* append_token -** -** Save a single token to a buffer. -*/ - -void MPtok::append_token(string& buff, int& sp, char *tok, int ef) -{ - // Convert tag separator chars and back quotes (?) - - for (int i = 0; tok[i]; i++) - { - if (tok[i] == option_tagsep) tok[i] = option_replacesep; - if (tok[i] == '`') tok[i] = '\''; - } - - // Skip whitespace if tokens are being output - // Otherwise, skip whitespace at the start of a sentence - - if (option_token || ! sp) while (isspace(*tok)) ++tok; - - // Save the token - - if (strlen(tok) > 0) - { - // Add delimiter if needed - - if (option_token && sp) buff += ' '; - - // Append token to output - - if (option_new < 9) - { - while (*tok && (! option_token || ! isspace(*tok))) - buff += *(tok++); - } else - { - while (*tok) - buff += *(tok++); - } - - sp = 1; - - // Add tag holders - - if (option_token && option_pretag.size() > 0) - { - buff += option_tagsep; - buff += option_pretag; - } - - // If it was end of sentence, then add newline - - if (ef) - { - buff += '\n'; - sp = 0; - } - } -} - -// Strip whitespace after sentences - -static void adjust_space(string& buff) -{ - while (buff.size() > 0 && isspace(buff[0])) buff.erase(0, 1); - - // delete two spaces in a row, but keep newlines - - for (int i = 1; i < buff.size(); i++) - { - if (isspace(buff[i]) && isspace(buff[i-1])) - buff.erase((buff[i] == '\n')?(--i):(i--), 1); - } - - for (int i = buff.size() - 1; i >= 0 && isspace(buff[i]); i--) - buff.erase(i, 1); -} - -/* token_string -** -** After the tokflag and endflag have been set, copy the tokens to the buffer. -*/ - -string MPtok::token_string() -{ - string buff; - - int i; - - // Move token starts to non-whitespace chars - - int last_tok = 0; - for (i = 0; i < text_len; i++) - { - if (tokflag[i] == 1 && isspace(text[i])) - { - tokflag[i] = 0; - last_tok = 1; - } else if (isspace(text[i]) == 0 && last_tok) - { - tokflag[i] = 1; - last_tok = 0; - } - } - - // Extract the tokens and print them out now - - char *tok = new char[text_len + 1]; - int pos = 0; - int sp = 0; - int ef = 0; - - tok[pos] = '\0'; - - for (i = 0; i <= text_len; i++) - { - // The start of a new token - - if (tokflag[i]) - { - // Print the current token - - append_token(buff, sp, tok, ef); - - // Start a new token - - pos = 0; - tok[pos] = '\0'; - - ef = 0; - } - - // Append to the current token - - tok[pos++] = text[i]; - tok[pos] = '\0'; - - // If any of the characters in the token are endflagged, - // Then pass this information along for end-of-sentence - - if (endflag[i]) ef = 1; - } - - // Print the last token - - append_token(buff, sp, tok, ef); - - delete[] tok; - - // Adjust the end of sentence boundaries - - adjust_space(buff); - - return buff; -} - -void MPtok::map_escapes() -{ - char *s; - int j, k, ch; - char buff[10]; - - k = 0; - for (int i = 0; text[i]; i++) - { - if (text[i] == '&' && text[i + 1] == '#') - { - for (s = &buff[0], j = 2; j <= 4 && i + j < text_len && isdigit(text[i + j]); j++) - *s++ = text[i + j]; - *s = '\0'; - ch = atoi(buff); - if (strlen(buff) > 0 && text[i + j] == ';' && ch > 0 && ch <= 256) - { - text[k] = ch; - if (! text[k]) text[k] = ' '; - k++; - i = i + j; - continue; - } - } - text[k++] = text[i]; - } - text[k] = '\0'; - text_len = k; -} - -MPtok::MPtok(string idir, const string& cnam) -{ - tok_initialized = 0; - - if (idir.size() == 0) - { - char *p = getenv("MEDPOST_HOME"); - if (p && strlen(p)) - { - idir = p; - - int found = idir.find("="); - if (found != string::npos) - idir = idir.substr(found + 1); - } - } - - - if (idir.size() == 0) - { - char buff[1000]; - FILE *fp = fopen("path_medpost", "r"); - if (fp) - { - if (fgets(buff, 1000, fp)) - { - chomp(buff); - idir = &buff[0]; - } - fclose(fp); - } - } - - if (idir.size() == 0) - idir = "/home/natxie/CPP64/lib/FIXED_DATA/"; - - option_dir = idir; - - option_token = 1; - option_segment = 1; - option_hyphen = 0; - option_comma = 1; - option_pretok = 0; - option_new = MPTOK_VERSION; - option_doteos = 0; - - if (cnam.size() > 0) - { - option_cnam = "_"; - option_cnam += cnam; - } - - init(); -} - -void MPtok::init(void) -{ - if (tok_initialized) return; - - string fname; - - fname = option_dir + "/medpost" + option_cnam + ".pairs"; - init_pair(fname); - - fname = option_dir + "/medpost" + option_cnam + ".abbr"; - init_abbr(fname); - - tok_initialized = 1; -} - -MPtok::~MPtok() -{ -} - -// Global tokenizer - -string MPtok::tokenize(const string& txt, int mt) -{ - if (option_pretok) return save_string(txt); - - option_token = mt; - text_len = txt.size(); - if (text_len == 0) return string(""); - - text = new char[text_len + 1]; - strcpy(text, txt.c_str()); - - map_escapes(); - - if (text_len == 0) return NULL; - - tokflag = new int[text_len + 1]; - endflag = new int[text_len + 1]; - - set_tokflag(); - if (option_new < 3) - set_endflag(); - else - set_endflag_01(); - - string buff = token_string(); - save_string(buff); - - delete[] text; text = NULL; - delete[] tokflag; tokflag = NULL; - delete[] endflag; endflag = NULL; - - return buff; -} - -string MPtok::tokenize(const string& text) -{ - return tokenize(text, 1); -} - -string MPtok::segment(const string& text) -{ - sent.clear(); - - // tokenize the text - - int save_option_segment = option_segment; - option_segment = 1; - string buff = tokenize(text, 0); - option_segment = save_option_segment; - - if (buff.size() == 0) return text; - - int found = 0; - int pos = 0; - - while (pos < buff.size()) - { - found = buff.find('\n', pos); - if (found == string::npos) - { - sent.push_back(buff.substr(pos)); - pos = buff.size(); - } else - { - sent.push_back(buff.substr(pos, found - pos)); - pos = found + 1; - } - } - - return buff; -} - -string MPtok::save_string(const string& s) -{ - stringstream ss (stringstream::in | stringstream::out); - string w, t; - int found; - string ret; - - word.clear(); - tag.clear(); - - ss << s; - while (ss.good()) - { - ss >> w; - if (w.size() == 0) break; - - found = w.find('_'); - - if (found != string::npos) - { - t = w.substr(found + 1); - w.resize(found); - word.push_back(w); - tag.push_back(t); - } else - { - word.push_back(w); - tag.push_back(option_pretag); - - } - if (ret.size() > 0) ret += " "; - ret += w; - } - - // now look for continuation tags... - - for (int i = 0; i < word.size(); i++) - { - int j = tag[i].size() - 1; - if (j >= 0 && tag[i][j] == '+' && i < tag.size() - 1) - { - word[i] = word[i] + " " + word[i + 1]; - tag[i] = tag[i + 1]; - word.erase(word.begin() + i + 1, word.begin() + i + 2); - tag.erase(tag.begin() + i + 1, tag.begin() + i + 2); - i--; - } - } - - return ret; -} - - -static int count_words(const char *s) -{ - int i; - - i = 1; - for (; *s; ++s) - { - if (*s == ' ') ++i; - } - return i; -} - -static void print_word(const char *s, int i) -{ - for (; i > 0 && *s; ++s) { if (*s == ' ') --i; } - while (*s && *s != ' ') { printf("%c", *s); ++s; } -} - -void MPtok::print(int how) -{ - int i, j, w; - - if (how != 0 && how != 2) - { - printf("print(%d) not defined\n", how); - return; - } - - for (i = 0; i < word.size(); ++i) - { - // Get the words from an idiom - - for (w = 0; w < count_words(word[i].c_str()); ++w) - { - if (how == 2 && i + w > 0) printf(" "); - - print_word(word[i].c_str(), w); - - if (how == 0) - { - printf(" tagged %s", tag[i].c_str()); - if (w < count_words(word[i].c_str()) - 1) printf("+"); - printf("\n"); - } else if (how == 2) - { - printf("%s%s", "_", tag[i].c_str()); - if (w < count_words(word[i].c_str()) - 1) printf("+"); - } - } - } - if (how == 2) - printf("\n"); -} - -void MPtok::merge_words(int s, int n) -{ - string tmp = word[s]; - - for (int i = s + 1; i < s + n; i++) - { - tmp += " "; - tmp += word[i]; - } - - // printf("merging words : '%s' n = %d\n", tmp.c_str(), n); - - for (int k = s; k + n < word.size(); k++) - { - word[k+1] = word[k+n]; - tag[k+1] = tag[k+n]; - } - - // Fixup the remaining array - - word.resize(word.size() - n + 1); - tag.resize(word.size()); - - word[s] = tmp; -} - -void MPtok::split_words() -{ - for (int i = 0; i < word.size(); i++) - { - int found = word[i].find(' '); - - if (found != string::npos) - { - string tmp1(word[i], 0, found); - string tmp2(word[i], found + 1, string::npos); - - // Move all the words and tags down - - word.resize(word.size() + 1); - tag.resize(tag.size() + 1); - - for (int j = word.size() - 1; j > i; j--) - { - word[j] = word[j - 1]; - tag[j] = tag[j - 1]; - } - - word[i] = tmp1; - tag[i] = tag[i+1]; - tag[i] += "+"; - - word[i+1] = tmp2; - } - } -} - -// Callable functions to set internal options - -void MPtok::set_segment(int i) { option_segment = i; } -void MPtok::set_hyphen(int i) { option_hyphen = i; } -void MPtok::set_comma(int i) { option_comma = i; } -void MPtok::set_pretag(char *a) { option_pretag = a; } -void MPtok::set_pretok(int i) { option_pretok = i; } -void MPtok::set_new(int i) { option_new = i; } -void MPtok::set_doteos(int i) { option_doteos = i; } +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "MPtok.h" + +// These options are probably compile time constants + +static char option_tagsep = '_'; // The tagsep character +static char option_replacesep = '-'; // Replace tagsep with this + +static void chomp(char *line) +{ + int i; + + i = strlen(line) - 1; + while (i >= 0 && line[i] == '\n' || line[i] == '\r') + line[i--] = '\0'; +} + +// Data structure and algorithm for finding common pairs. + +// read a file of pairs into a data structure, +// the file must be sorted first + +void MPtok::init_pair(const string& file_name) +{ + filebuf fb; + fb.open(file_name.c_str(), ios::in); + istream is(&fb); + string pair; + + while (1) + { + getline(is, pair); + if (is.fail()) break; + if (pair.size() > 0) common_pair.insert(pair); + } + + fb.close(); +} + +// List of abbreviations in 3 categories +// ABB = can occur mid sentence +// EOS = can occur at end of sentence +// NUM = only used before numbers + +void MPtok::init_abbr(const string& file_name) +{ + filebuf fb; + fb.open(file_name.c_str(), ios::in); + istream is(&fb); + string typ, abb; + map val; + val["ABB"] = ABB_ABB; val["EOS"] = ABB_EOS; val["NUM"] = ABB_NUM; + + while (is.good()) + { + is >> typ; + if (val.count(typ)) + { + is >> abb; + if (abb.size() > 0) common_abbr[abb] = val[typ]; + } + } + fb.close(); +} + +static char nextchar(const char *t, int i) +{ + while (isspace(t[i])) i++; + return t[i]; +} + +// Look for a token at or prior to the text position + +static int lookbehind(const char *t, int i, const char *s, int *tokflag) +{ + int k = (int) strlen(s) - 1; + + while (i > 0 && isspace(t[i])) i--; + + while (k >= 0 && i >= 0) + { + if (k > 0 && tokflag[i]) break; + + if (tolower(s[k]) != tolower(t[i])) + return -1; + k--; + i--; + } + + return (k < 0 && tokflag[i+1]) ? i + 1 : -1; +} + +// Look for a token at or following the text position + +static int lookahead(const char *t, int i, const char *s, int *tokflag) +{ + int k = 0; + + while (isspace(t[i])) i++; + + while (k < strlen(s) && i < strlen(t)) + { + if (k > 0 && tokflag[i]) break; + + if (tolower(s[k]) != tolower(t[i])) + return -1; + k++; + i++; + } + + return (k == strlen(s) && tokflag[i]) ? i - (int) strlen(s) : -1; +} + +// Set the initial tokens at spaces + +void MPtok::tok_0() +{ + int i; + + tokflag[0] = 1; + for (i = 1; i < text_len; i++) + { + tokflag[i] = isspace(text[i]) || (i > 0 && isspace(text[i - 1])) ? 1 : 0; + } + tokflag[i] = 1; +} + +// Get quotes preceded by open parens +// +// A double quote, preceded by a space or open bracket is a separate token +// + +void MPtok::tok_1() +{ + for (int i = 1; i < text_len; i++) + { + if (text[i] == '"' && strchr("([{<", text[i-1])) + { + tokflag[i] = 1; + if (i + 1 < text_len) tokflag[i+1] = 1; + } + } +} + +// Look for ellipses +// +// Three dots in a row is a separate token + +void MPtok::tok_2() +{ + for (int i = 1; i + 2 < text_len; i++) + { + if (strncmp(&text[i], "...", 3) == 0) + { + tokflag[i] = 1; + if (i + 3 < text_len) tokflag[i+3] = 1; + } + } +} + +// Non-sentence-ending punctuation +// +// Certain punctuation characters are separate tokens + +void MPtok::tok_3() +{ + for (int i = 0; i < text_len; i++) + { + // If it is a comma and the next char is not a space and option_comma = 0 + + if (option_comma == 0 && text[i] == ',' && isspace(text[i + 1]) == 0) + { + // do nothing + } else if (strchr(",;:@#$%&", text[i])) + { + tokflag[i] = 1; + tokflag[i + 1] = 1; + } + } +} + +// Separate the slashes +// +// Slashes are a separate token +// except for +/-, +/+, -/-, -/+, and and/or. + +void MPtok::tok_5_6_7() +{ + for (int i = 0; i < text_len; i++) + { + if (text[i] == '/') + { + tokflag[i] = 1; + if (i+1 < text_len) tokflag[i+1] = 1; + + // Put back +/-, etc, unless option_hyphen is 1 + + if (i - 1 >= 0 + && i + 1 < text_len + && ((option_new < 9 + && text[i - 1] == '+' || (text[i - 1] == '-' && option_hyphen == 0) + && text[i + 1] == '+' || (text[i + 1] == '-' && option_hyphen == 0)) + || (option_new >= 9 + && (text[i - 1] == '+' || text[i - 1] == '-') + && (text[i + 1] == '+' || text[i + 1] == '-')))) + { + tokflag[i - 1] = 1; + tokflag[i] = tokflag[i+1] = 0; + tokflag[i + 2] = 1; + } + + // Put back and/or, etc + + if (option_new <= 7) + { + if (i > 5 && strncmp(text + i - 5, " and/or ", 8) == 0) + { + for (int j = 1; j < 5; j++) + tokflag[i - 2 + j] = 0; + } + } else + { + if (i > 4 && strncmp(text + i - 4, " and/or ", 8) == 0) + { + for (int j = 1; j < 6; j++) + tokflag[i - 3 + j] = 0; + } + } + } + } +} + +// All brackets +// +// Any open or closed bracket is a separate token +// +// Exclamation and question mark +// +// Any question or exclamation mark is a separate token + +void MPtok::tok_8_9() +{ + for (int i = 0; i < text_len; i++) + { + if (strchr("[](){}<>", text[i]) + || strchr("?!", text[i])) + { + tokflag[i] = 1; + if (i + 1 < text_len) tokflag[i+1] = 1; + } + } +} + +// Period at the end of a string may be followed by closed-bracket or quote +// +// A period that is preceded by a non-period +// and optionally followed by a close paren +// and any amount of space at the end of the string +// is a separate token. + +void MPtok::tok_10() +{ + for (int i = text_len - 1; i >= 0; i--) + { + if (isspace(text[i])) continue; + if (strchr("])}>\"'", text[i])) continue; + if (text[i] != '.') break; + if (text[i] == '.' && (i - 1 < 0 || text[i-1] != '.')) + { + tokflag[i] = 1; + if (i + 1 < text_len) tokflag[i+1] = 1; + } + } +} + +// Period followed by a capitalized word +// +// A period preceded by a character that is not another period and not a space +// and followed by a space then an upper case letter is a separate token + +void MPtok::tok_11() +{ + for (int i = 0; i < text_len; i++) + { + if (text[i] == '.' + && (i + 1 < text_len && isspace(text[i+1])) + && (i - 1 < 0 || text[i - 1] != '.' || isspace(text[i-1]) == 0) + && isupper(nextchar(text, i + 1))) + tokflag[i] = 1; + } +} + +// A normal word followed by a period +// +// A period followed by a space +// and preceded by 2 or more alphabetic characters or hyphens +// is a separate token + +void MPtok::tok_12() +{ + int wcnt = 0; + + for (int i = 0; i < text_len; i++) + { + if (text[i] == '.' + && tokflag[i + 1] + && wcnt >= 2) + tokflag[i] = 1; + + if (isalpha(text[i]) || text[i] == '-') + ++wcnt; + else + wcnt = 0; + } +} + +// A non-normal token (that has no lower case letters) followed by a period +// +// A period at the end of a token made of characters excluding lower case +// is a separate token + +void MPtok::tok_13() +{ + int stok = 0; + int wcnt = 0; + + for (int i = 0; i < text_len; i++) + { + if (text[i] == '.' + && tokflag[i + 1] + && wcnt >= 2) + tokflag[i] = 1; + + if (tokflag[i] == 1) stok = 1; + + if (islower(text[i]) || text[i] == '.') + { + stok = 0; + wcnt = 0; + } + + if (stok) + wcnt++; + } +} + +// put some periods with single-letter abbreviations +// +// A single alphabetic token followed by a period followed +// by a token that does not begin with an upper case letter +// or number is taken to be an abbreviation and the period +// does not start a new token. +// +// NOTE: This does not recognize initials in people's names, +// that problem is not simply solved. + +void MPtok::tok_14() +{ + for (int i = 0; i < text_len; i++) + { + if (text[i] == '.' + && i - 1 >= 0 && isalpha(text[i - 1]) && tokflag[i - 1] + && tokflag[i + 1] + && isupper(nextchar(text, i + 1)) == 0 + && isdigit(nextchar(text, i + 1)) == 0 + && nextchar(text, i + 1) != '(' + ) + { + tokflag[i] = 0; + } + } +} + +void MPtok::tok_15() +{ + int i, j, k, a; + char buff[MAX_ABB + 1]; + + for (i = 0; i < text_len; i++) + { + // only start at a current token + + if (! tokflag[i]) continue; + + // find alphabetic followed by period + + buff[0] = '\0'; + for (k = 0; i + k < text_len && k < MAX_ABB; k++) + { + buff[k] = text[i+k]; buff[k+1] = '\0'; + if (k > 0 && buff[k] == '.') break; // this is good + if (! isalpha(buff[k])) { buff[0] = '\0'; break; } // this is not good + } + + if (buff[0] == '\0' || i + k == text_len || k == MAX_ABB) continue; + + // at this point, buff[k] == '.' add 1 to make it the length + + k++; + + // if not found, try finding a concatenated abbrev + + if (! common_abbr.count(buff)) + { + for (; i + k < text_len && k < MAX_ABB; k++) + { + buff[k] = text[i+k]; buff[k+1] = '\0'; + if (k > 0 && buff[k] == '.') break; // this is good + if (! isalpha(buff[k])) { buff[0] = '\0'; break; } // this is not good + } + + if (buff[0] == '\0' || i + k == text_len || k == MAX_ABB) continue; + + // at this point, buff[k] == '.' add 1 to make it the length + + k++; + } + + // if not found, give up + + if (! common_abbr.count(buff)) continue; + + if (common_abbr[buff] == ABB_NUM) + { + for (j = i + k; j < text_len && isspace(text[j]); j++) ; // next must be a number + if (! isdigit(text[j])) continue; // go to next abbreviation + } else if (common_abbr[buff] == ABB_EOS) + { + for (j = i + k; j < text_len && isspace(text[j]); j++) ; // if next token is upper case letter + if (isupper(text[j])) tokflag[i + (--k)] = 1; // tokenize the final period of this abbreviation + } + + // clear all token flags + + for (j = 1; j < k; j++) tokflag[i + j] = 0; + } +} + +// Check for common pairs that should not be considered sentence breaks + +void MPtok::tok_15_1() +{ + int i, j, k, tnum, p; + char buff[MAX_ABB + 1]; + + for (i = 0; i < text_len; i++) + { + if (! tokflag[i]) continue; + + // must be alphanumeric token followed by period token followed by space followed by alphanumeric token + + tnum = 0; + buff[0] = '\0'; + for (p = k = 0; i + k < text_len && k < MAX_ABB; k++) + { + buff[k] = text[i+k]; buff[k+1] = '\0'; + + if (isspace(buff[k])) + { + if (tnum == 2) break; // this is good + else if (tnum == 1) continue; // ok + else { buff[0] = '\0'; break; } // this shouldn't happen + } + + if (tokflag[i+k]) + { + if (tnum > 2) break; // done + else tnum++; + } + + if (tnum == 1 && buff[k] == '.') p = k; + if (tnum == 1 && buff[k] != '.') { buff[0] = '\0'; break; } // nope + if (! isalnum(buff[k])) { buff[0] = '\0'; break; } // nope + } + + if (buff[0] == '\0' || i + k == text_len || k == MAX_ABB) continue; + + // at this point buff is a potential pair, so untokenize the period, that's all + + if (common_pair.count(buff)) + tokflag[p] = 0; + } +} + +// Get cases where a space after a sentence has been omitted +// +// A period that occurs in a token consisting of alphabetic +// letters with a vowel to the left and the right is a +// separate token. + +void MPtok::tok_16() +{ + int j; + int has_vowel; + + for (int i = 0; i < text_len; i++) + { + if (text[i] == '.' && tokflag[i] == 0) + { + has_vowel = 0; + for (j = i - 1; j >= 0; --j) + { + if (isalpha(text[j]) == 0) + break; + if (strchr("aeiouAEIOU", text[j])) + has_vowel = 1; + if (tokflag[j]) + break; + } + if ((j >= 0 && tokflag[j] == 0) || has_vowel == 0) + continue; + + j = i + 1; + + has_vowel = 0; + for (; j < text_len && tokflag[j] == 0; ++j) + { + if (isalpha(text[j]) == 0) + break; + if (strchr("aeiouAEIOU", text[j])) + has_vowel = 1; + } + + if ((j < text_len && tokflag[j] == 0) || has_vowel == 0) + continue; + + tokflag[i] = 1; + tokflag[i + 1] = 1; + } + } +} + +// Correction to tok_16, +// Don't count if the token before is a single letter +// or the token following is a single letter other than 'a'. +// Also, don't count if the token to the right is gov, com, edu, etc. +// because those are web addresses! + +#define COMPLEX_WINDOW 40 + +enum {COMPLEX_NOT = 0, COMPLEX_YES, COMPLEX_DONE}; + +struct _complex { + int flag; + int offset; + const char *str; + int len; +} complex[] = { + COMPLEX_YES, 0, "complex", 7, + COMPLEX_NOT, 0, "complexi", 8, + COMPLEX_NOT, 0, "complexed", 9, + COMPLEX_NOT, 0, "complexa", 8, + COMPLEX_NOT, 0, "complex-", 8, + COMPLEX_NOT, 0, "complexl", 8, + COMPLEX_NOT, 0, "complexu", 8, + COMPLEX_NOT, -1, "-complex", 7, + COMPLEX_NOT, -2, "nocomplex", 9, + COMPLEX_NOT, -3, "subcomplex", 10, + COMPLEX_YES, 0, "hybrid", 6, + COMPLEX_NOT, 0, "hybridi", 7, + COMPLEX_NOT, 0, "hybrido", 7, + COMPLEX_NOT, 0, "hybrida", 7, + COMPLEX_NOT, 0, "hybrid-", 7, + COMPLEX_NOT, -1, "-hybrid", 7, + COMPLEX_YES, 0, "duplex", 6, + COMPLEX_NOT, -1, "oduplex", 7, + COMPLEX_DONE, 0, NULL, 0, +}; + +int MPtok::complex_check() +{ + int last_period = -2*COMPLEX_WINDOW; + int last_complex = -2*COMPLEX_WINDOW; + int i, j; + int complex_match; + + for (i = 0; i < text_len; i++) + { + if (text[i] == '.') + { + if (i - last_complex <= COMPLEX_WINDOW) + return 1; + last_period = i; + } + + complex_match = 0; + for (j = 0; complex[j].str; j++) + { + if (complex[j].flag == COMPLEX_NOT) + { + if (i + complex[j].offset >= 0 + && strncmp(text+i+complex[j].offset, complex[j].str, complex[j].len) == 0) + { + // don't match here + complex_match = 0; + } + } else if (complex[j].flag == COMPLEX_YES) + { + if (i + complex[j].offset >= 0 + && strncmp(text+i+complex[j].offset, complex[j].str, complex[j].len) == 0) + { + // match here + complex_match = 1; + } + } + } + + if (complex_match) + { + if (i - last_period <= COMPLEX_WINDOW) + return 1; + last_complex = i; + } + } + return 0; +} + +void MPtok::tok_16_1() +{ + int i, j; + char v1, v2; + int c1, c2; + + if (option_new == 3 && strstr(text, "complex")) + return; + + if (option_new >= 4 && complex_check()) + return; + + for (i = 0; i < text_len; i++) + { + if (text[i] == '.' && tokflag[i] == 0) + { + char suffix[10]; + int s_i; + + v1 = '\0'; + c1 = 0; + for (j = i - 1; j >= 0; --j) + { + if (isalpha(text[j]) == 0) + break; + if (strchr("aeiouAEIOU", text[j])) + v1 = tolower(text[j]); + c1++; + if (tokflag[j]) + break; + } + if ((j >= 0 && tokflag[j] == 0) + || v1 == '\0' + || c1 == 1) + continue; + + j = i + 1; + + v2 = '\0'; + c2 = 0; + s_i = 0; + for (; j < text_len && tokflag[j] == 0; ++j) + { + if (isalpha(text[j]) == 0) + break; + if (strchr("aeiouAEIOU", text[j])) + v2 = tolower(text[j]); + if (s_i < 3) + suffix[s_i++] = tolower(text[j]); suffix[s_i] = '\0'; + c2++; + } + + if ((j < text_len && tokflag[j] == 0) + || v2 == '\0' + || (c2 == 1 && v2 != 'a') + || (c2 == 3 && tokflag[j] == 1 && s_i == 3 + && (strcmp(suffix, "gov") == 0 + || strcmp(suffix, "edu") == 0 + || strcmp(suffix, "org") == 0 + || strcmp(suffix, "com") == 0))) + continue; + + tokflag[i] = 1; + tokflag[i + 1] = 1; + } + } +} + + +// Numeric endings of sentences +// +// A period after a numeric token followed by a token that starts +// with an alphabetic character, is a separate token. +// +// This should be covered already by tok_13 + +void MPtok::tok_17() +{ + int j; + + for (int i = 0; i < text_len; i++) + { + if (text[i] == '.' + && tokflag[i] == 0 + && tokflag[i + 1]) + { + for (j = i - 1; j >= 0 && isdigit(text[j]) && tokflag[j] == 0; --j) + ; + if (j >= 0 && j < i - 1 && tokflag[j] && isalpha(nextchar(text, i + 1))) + tokflag[i] = 1; + } + } +} + +// period at end of string is a token + +void MPtok::tok_20() +{ + for (int i = text_len - 1; i >= 0; --i) + { + if (isspace(text[i])) + continue; + + if (strchr(".!?", text[i])) + tokflag[i] = 1; + + break; + } +} + +// a period that follows a non-common word, and that is +// followed by a lower case common word is probably not a token + +void MPtok::tok_20_1() +{ + int j; + + for (int i = 0; i < text_len; ++i) + { + if (text[i] == '.' && tokflag[i] == 1) + { + int tcnt, lcnt, ocnt; + tcnt = lcnt = ocnt = 0; + + // make sure the previous word was *not* common + + for (j = i - 1; j >= 0; j--) + { + if (isspace(text[j])) continue; + if (option_new >= 2) + { + if (islower(text[j]) == 0 && text[j] != '-') ocnt++; + } else + { + if (! islower(text[j])) ocnt++; + } + + if (tokflag[j] || j == 0) + { + if (ocnt == 0) + { + goto nexti; + } + break; + } + } + + tcnt = lcnt = ocnt = 0; + + // make sure the next word is common + + for (j = i + 1; j < text_len; j++) + { + if (isspace(text[j])) continue; + if (tokflag[j]) tcnt++; + + if (tcnt == 2 || j == text_len - 1) + { + if (lcnt > 0 && ocnt == 0) tokflag[i] = 0; + break; + } + + if (islower(text[j])) lcnt++; + else ocnt++; + } + } +nexti: ; + } +} + +// tokenized period followed by non-space other than close paren +// is not a token + +void MPtok::tok_20_2() +{ + int j; + + for (int i = 0; i < text_len - 1; ++i) + { + if (text[i] == '.' && tokflag[i] == 1 + && strchr(" ()[]\"\'\n\t\r", text[i+1]) == 0) + { + tokflag[i] = 0; + } + } +} + + +// long dash +// +// A pair of hyphens is a complete token + +void MPtok::tok_21() +{ + for (int i = 0; i + 1 < text_len; i++) + { + if (strncmp(&text[i], "--", 2) == 0) + { + tokflag[i] = 1; + if (i + 2 < text_len) + { + i += 2; + tokflag[i] = 1; + } + } + } +} + +// hyphens +// +// If specified as an option, a hyphen between letters is a complete token + +void MPtok::tok_21a() +{ + if (option_hyphen == 0) return; + + for (int i = 0; i + 1 < text_len; i++) + { + if (text[i] == '-' + && (i == 0 || text[i-1] != '-') + && text[i+1] != '-') + { + tokflag[i] = 1; + tokflag[i+1] = 1; + } + } +} + + +// quote +// +// Any double quote is a separate token + +void MPtok::tok_22() +{ + for (int i = 0; i < text_len; i++) + { + if (text[i] == '"') + { + tokflag[i] = 1; + if (i + 1 < text_len) + { + i += 1; + tokflag[i] = 1; + } + } + } +} + +// possessive +// +// Any single quote at the end of a token that is not +// preceded by a single quote is a separate token + +void MPtok::tok_23() +{ + for (int i = 0; i < text_len; i++) + { + if (text[i] == '\'' + && (i - 1 >= 0 && text[i - 1] != '\'') + && tokflag[i + 1]) + { + tokflag[i] = 1; + } + } +} + + +// quote +// +// If a single quote starts a token, or is preceded by a +// single quote, and followed by a character +// that is not a single quote, then +// the character to it's right is the start of a new token + +void MPtok::tok_24() +{ + for (int i = 0; i < text_len; i++) + { + if (text[i] == '\'' + && (tokflag[i] == 1 || (i - 1 >= 0 && text[i - 1] == '\'')) + && (i + 1 < text_len && text[i + 1] != '\'')) + { + tokflag[i + 1] = 1; + } + } +} + +// put back possessive +// +// A single quote that is a whole token followed by a lower case s +// that is also a whole token (without space between them) +// should be merged into a single token + +void MPtok::tok_25() +{ + for (int i = 0; i < text_len; i++) + { + if (text[i] == '\'' + && tokflag[i] == 1 + && i + 1 < text_len && text[i + 1] == 's' + && tokflag[i+1] == 1 + && (i + 2 >= text_len || isspace(text[i + 2]) || tokflag[i + 2] == 1)) + { + tokflag[i + 1] = 0; + } + } +} + +// quote +// +// A pair of single quotes is a separate token + +void MPtok::tok_26() +{ + for (int i = 0; i < text_len; i++) + { + if (strncmp(&text[i], "''", 2) == 0 + || strncmp(&text[i], "``", 2) == 0) + { + tokflag[i] = 1; + if (i + 2 < text_len) tokflag[i + 2] = 1; + } + } +} + +// possessive +// +// A single quote followed by a letter s is a possessive + +void MPtok::tok_27() +{ + for (int i = 0; i < text_len; i++) + { + if (text[i] == '\'' + && i + 1 < text_len + && tolower(text[i + 1]) == 's' + && (i + 2 >= text_len || tokflag[i + 2])) + { + tokflag[i] = 1; + } + } +} + +// split "cannot" to "can not" +// +// A single token that is the word cannot (in any case) +// is split into two words + +void MPtok::tok_28() +{ + for (int i = 0; i < text_len; i++) + { + if ((strncmp(&text[i], "cannot", 6) == 0 + || strncmp(&text[i], "Cannot", 6) == 0) + && tokflag[i + 6]) + { + tokflag[i + 3] = 1; + } + } +} + +// put list item elements back at sentence end +// +// A period that is preceded by an alphanumeric (no space) +// and any amount of preceding space and an end-mark +// stays with the alphanumeric. + +void MPtok::tok_29() +{ + int j; + + for (int i = 0; i < text_len; i++) + { + if (text[i] == '.' + && tokflag[i] && tokflag[i + 1] + && i - 1 >= 0 && isalnum(text[i - 1]) + && tokflag[i - 1] + && ((j = lookbehind(text, i-2, ".", tokflag)) >= 0 + || (j = lookbehind(text, i-2, "?", tokflag)) >= 0 + || (j = lookbehind(text, i-2, "!", tokflag)) >= 0) + && tokflag[j]) + { + tokflag[i] = 0; + } + } +} + +// attach list elements to the beginnings of their sentences +// this means, attach the period to the list element +// +// a list element is a single letter or a one or two digits +// which is preceded by an end of sentence ".!?;" +// or colon (provided it doesn't belong to a proportion construct) + +void MPtok::tok_29a() +{ + int i, j; + + for (i = 0; i < text_len; i++) + { + if (text[i] == '.' && tokflag[i]) + { + // Look back, make sure the token before the period + // is either single alphanumeric, or at most a two digit number + // and the character before that is a punctuation ".?!:," + + int tcnt, acnt, dcnt, pcnt, ocnt, scnt; + tcnt = acnt = dcnt = pcnt = ocnt = scnt = 0; + char p; + + for (j = i - 1; j >= 0; j--) + { + if (isspace(text[j])) { scnt++; continue; } + else if (tcnt == 0 && isalpha(text[j])) ++acnt; + else if (tcnt == 0 && isdigit(text[j])) ++dcnt; + else if (tcnt == 1 && strchr(".!?:;,", text[j])) { pcnt++; p = text[j]; } + else ocnt++; + + if (tokflag[j] || j == 0) + { + tcnt++; + if (tcnt == 1 && ocnt == 0 && scnt == 0 + && ((acnt == 1 && dcnt == 0) || (acnt == 0 && dcnt > 0 && dcnt <= 2))) + { + // This is acceptable + } else if (tcnt == 2 && pcnt <= 1 && ocnt == 0 && scnt > 0) + { + if (p == ':') + { + while (--j >= 0 && isspace(text[j])) + ; + if (j >= 0 && isdigit(text[j])) + { + // It's probably a proportion + break; + } + } + // Jackpot + tokflag[i] = 0; + } else + { + // This is not + break; + } + scnt = 0; + } + } + } + } +} + +// list elements at the beginning of a string +// +// An alphanumeric token followed by a period +// at the beginning of the line stays with the +// alphanumeric + +void MPtok::tok_30() +{ + int i = 0; + + while (isspace(text[i])) i++; + + if (isalnum(text[i]) + && tokflag[i] + && i + 1 < text_len + && text[i + 1] == '.' + && tokflag[i + 1]) + { + tokflag[i + 1] = 0; + } +} + +// process American style numbers + +void MPtok::tok_31() +{ + int j; + + for (int i = 0; i < text_len; i++) + { + if (text[i] == ',' + && i + 3 < text_len + && tokflag[i] && tokflag[i + 1] + && isdigit(text[i + 1]) + && isdigit(text[i + 2]) + && isdigit(text[i + 3]) + && i - 1 >= 0 && isdigit(text[i - 1]) + ) + { + tokflag[i] = 0; + tokflag[i + 1] = 0; + } + } +} + +// process British style numbers + +void MPtok::tok_32() +{ + int j; + + for (int i = 0; i < text_len; i++) + { + if (text[i] == ' ' + && i + 3 < text_len + && tokflag[i] && tokflag[i + 1] + && isdigit(text[i + 1]) + && isdigit(text[i + 2]) + && isdigit(text[i + 3]) + && i - 1 >= 0 && isdigit(text[i - 1]) + ) + { + tokflag[i] = 0; + tokflag[i + 1] = 0; + } + } +} + +// tokenize unicode escapes +// +// Added + +void MPtok::tok_33() +{ + int j; + + for (int i = 0; i < text_len; i++) + { + if (text[i] == '&') + { + if (text[i + 1] == '#') + { + for (j = i + 2; isdigit(text[j]); j++) + ; + } else + { + for (j = i + 1; isalpha(text[j]); j++) + ; + } + + if (text[j] == ';') + { + // Tokenize the escape, untokenize everything inside + + tokflag[i] = 1; + for (i++; i <= j; i++) tokflag[i] = 0; + tokflag[i] = 1; + } + } + } +} + +// Remove tags if they are present + +void MPtok::tok_un() +{ + int untok = 0; + for (int i = 0; text[i]; ++i) + { + if (isspace(text[i])) untok = 0; + if (text[i] == option_tagsep) untok = 1; + if (untok) text[i] = ' '; + } +} + + +void MPtok::set_tokflag() +{ + int i; + + tok_0(); + tok_1(); + tok_2(); + tok_3(); + + // step 4 replaces tag char, this is done at output + + tok_5_6_7(); + tok_8_9(); + + tok_10(); + tok_11(); + if (option_new >= 1) + { + tok_21(); + tok_21a(); + tok_22(); + tok_23(); + tok_24(); + tok_25(); + tok_26(); + tok_27(); + } + tok_12(); + tok_13(); + tok_14(); + if (option_new <= 5) + tok_15(); + if (option_new < 2) + tok_16(); + tok_17(); + + // steps 18 and 19 recognize periods within parens, + // and this is moved to the segmentation section + + tok_20(); + if (option_new >= 1) + { + tok_20_1(); + tok_20_2(); + if (option_new >= 2) + tok_16_1(); + if (option_new >= 6) + tok_15(); + if (option_new >= 7) + tok_15_1(); + } + if (option_new < 1) + { + tok_21(); + tok_21a(); + tok_22(); + tok_23(); + tok_24(); + tok_25(); + tok_26(); + tok_27(); + } + tok_28(); + if (option_new >= 1) + tok_29a(); + else + tok_29(); + tok_30(); + tok_31(); + tok_32(); + + tok_33(); +} + +/* set_endflag +** +** After tokflag has been set, find the possible sentence endings. +*/ + +void MPtok::set_endflag() +{ + int i; + + // The following tests look for end-stops and label them. + // They include steps 18 and 19 + + for (i = 0; i <= text_len; i++) + endflag[i] = 0; + + // Count the number of unmatched parens + + int up = 0; // unmatched round parens + int ub = 0; // unmatched brackets + + for (i = 0; i < text_len; i++) + { + if (text[i] == '(') ++up; + if (text[i] == ')') --up; + if (text[i] == '[') ++ub; + if (text[i] == ']') --ub; + if (up < 0) up = 0; + if (ub < 0) ub = 0; + } + + // Now find the end-of-sentence marks + + // tok_18: periods within parentheses, allow for nesting + // tok_19: periods within brackets, allow for nesting + // the perl version solves this by putting the period + // back with the previous token, but a better solution + // is to allow it to be tokenized but just don't + // allow it to be an end-of-sentence. + // Therefore, these are moved to the segmentation + // section + + int p = 0; // round parens + int b = 0; // brackets + + for (i = 0; i < text_len; i++) + { + if (text[i] == '(') ++p; + if (text[i] == ')') --p; + if (text[i] == '[') ++b; + if (text[i] == ']') --b; + if (p < 0) p = 0; + if (b < 0) b = 0; + + if (strchr(".!?", text[i]) + && tokflag[i] + && tokflag[i + 1]) + { + if (option_segment && p <= up && b <= ub) + endflag[i] = 1; + + // This is optional to join periods with + // probable abbreviations + + if (p > up || b > ub) + tokflag[i] = 0; + } + } + + // endtokens followed by a single or double quote, which matches + // a single or double quote in the previous sentence + + if (option_new >= 1) + { + int dquo, squo; + dquo = squo = 0; + + for (i = 0; i < text_len; i++) + { + if (text[i] == '"') dquo = ! dquo; + else if (text[i] == '\'') squo = ! squo; + else if (endflag[i]) + { + if ((text[i+1] == '"' && dquo) || (text[i+1] == '\'' && squo)) + { + endflag[i] = 0; + + // But don't end at all if the next token is something + // other than an upper case letter. + + if (option_new >= 2) + { + int j; + int ok = 0; + + for (j = i + 2; j < text_len; j++) + { + if (isspace(text[j])) continue; + // if (isupper(text[j])) + if (isupper(text[j]) || text[j] == '(') + { + ok = 1; + break; + } + if (tokflag[j]) break; + } + + if (ok) + endflag[i+1] = 1; + } else + { + endflag[i+1] = 1; + } + } + dquo = squo = 0; + } + } + } +} + + +/* set_endflag_01 +** +** After tokflag has been set, find the possible sentence endings. +** This has improved paren matching. +*/ + +#define MAX_MATCH 500 // Maximum length to get a paren match + +void MPtok::set_endflag_01() +{ + int match[text_len]; + int i, j; + + // The following tests look for end-stops and label them. + // They include steps 18 and 19 + + for (i = 0; i <= text_len; i++) + endflag[i] = 0; + + for (i = 0; i < text_len; i++) + match[i] = 0; + + for (i = text_len - 1; i >= 0; i--) + { + if (text[i] == '(' || text[i] == '[') + { + for (j = i + 1; text[j] && j - i <= MAX_MATCH; j++) + { + // Skip parens that are already matched + + if (match[j] > j) + { + j = match[j]; + continue; + } + + // Look for a matching close paren + + if (match[j] == 0 + && ((text[i] == '(' && text[j] == ')') + || (text[i] == '[' && text[j] == ']'))) + { + match[i] = j; + match[j] = i; + break; + } + } + } + } + + int next_match = 0; + for (i = 0; i < text_len; i++) + { + if (match[i] > next_match) + next_match = match[i]; + + if (strchr(".!?", text[i]) + && tokflag[i] + && tokflag[i + 1] + && (option_new <= 4 || option_doteos == 1 || (i > 0 && isspace(text[i-1]) == 0))) + { + if (i <= next_match) + tokflag[i] = 0; + else if (option_segment) + endflag[i] = 1; + } + } + + // endtokens followed by a single or double quote, which matches + // a single or double quote in the previous sentence + + int dquo, squo; + dquo = squo = 0; + + for (i = 0; i < text_len; i++) + { + if (option_new <= 7 && text[i] == '"') dquo = ! dquo; + else if (option_new >= 8 && text[i] == '"' && tokflag[i] && tokflag[i+1]) dquo = ! dquo; + else if (option_new <= 7 && text[i] == '\'') squo = ! squo; + else if (option_new >= 8 && text[i] == '\'' + && tokflag[i] && (tokflag[i+1] || (text[i+1] == '\'' && tokflag[i+2]))) squo = ! squo; + else if (endflag[i]) + { + if ((text[i+1] == '"' && dquo) || (text[i+1] == '\'' && squo)) + { + endflag[i] = 0; + + // But don't end at all if the next token is something + // other than an upper case letter. + + if (option_new >= 2) + { + int j; + int ok = 0; + + for (j = i + 2; j < text_len; j++) + { + if (isspace(text[j])) continue; + // if (isupper(text[j])) + if (isupper(text[j]) || text[j] == '(') + { + ok = 1; + break; + } + if (tokflag[j]) break; + } + + if (ok) + endflag[i+1] = 1; + } else + { + endflag[i+1] = 1; + } + } + dquo = squo = 0; + } + } +} + + +// Size buffer: return the size of the buffer required to hold all of the tokenized text. +// It can be simply estimated by a formula that depends only on the length of text and number of tokens. + +int MPtok::size_buff() +{ + int size = 1; // Start with null terminator + int t = option_pretag.size(); // for each tag, the length of the UNTAG string + + if (t <= 0) t = 1; // Make sure there is at least one + t += 2; // Add one for underscore and one for space + + for (int i = 0; i < text_len; i++) + { + size++; // Count all characters + if (tokflag[i]) size += t; // Count token delimiters (may overcount) + if (endflag[i]) size++; // Add one for newline + } + return size; +} + + +/* append_token +** +** Save a single token to a buffer. +*/ + +void MPtok::append_token(string& buff, int& sp, char *tok, int ef) +{ + // Convert tag separator chars and back quotes (?) + + for (int i = 0; tok[i]; i++) + { + if (tok[i] == option_tagsep) tok[i] = option_replacesep; + if (tok[i] == '`') tok[i] = '\''; + } + + // Skip whitespace if tokens are being output + // Otherwise, skip whitespace at the start of a sentence + + if (option_token || ! sp) while (isspace(*tok)) ++tok; + + // Save the token + + if (strlen(tok) > 0) + { + // Add delimiter if needed + + if (option_token && sp) buff += ' '; + + // Append token to output + + if (option_new < 9) + { + while (*tok && (! option_token || ! isspace(*tok))) + buff += *(tok++); + } else + { + while (*tok) + buff += *(tok++); + } + + sp = 1; + + // Add tag holders + + if (option_token && option_pretag.size() > 0) + { + buff += option_tagsep; + buff += option_pretag; + } + + // If it was end of sentence, then add newline + + if (ef) + { + buff += '\n'; + sp = 0; + } + } +} + +// Strip whitespace after sentences + +static void adjust_space(string& buff) +{ + while (buff.size() > 0 && isspace(buff[0])) buff.erase(0, 1); + + // delete two spaces in a row, but keep newlines + + for (int i = 1; i < buff.size(); i++) + { + if (isspace(buff[i]) && isspace(buff[i-1])) + buff.erase((buff[i] == '\n')?(--i):(i--), 1); + } + + for (int i = buff.size() - 1; i >= 0 && isspace(buff[i]); i--) + buff.erase(i, 1); +} + +/* token_string +** +** After the tokflag and endflag have been set, copy the tokens to the buffer. +*/ + +string MPtok::token_string() +{ + string buff; + + int i; + + // Move token starts to non-whitespace chars + + int last_tok = 0; + for (i = 0; i < text_len; i++) + { + if (tokflag[i] == 1 && isspace(text[i])) + { + tokflag[i] = 0; + last_tok = 1; + } else if (isspace(text[i]) == 0 && last_tok) + { + tokflag[i] = 1; + last_tok = 0; + } + } + + // Extract the tokens and print them out now + + char *tok = new char[text_len + 1]; + int pos = 0; + int sp = 0; + int ef = 0; + + tok[pos] = '\0'; + + for (i = 0; i <= text_len; i++) + { + // The start of a new token + + if (tokflag[i]) + { + // Print the current token + + append_token(buff, sp, tok, ef); + + // Start a new token + + pos = 0; + tok[pos] = '\0'; + + ef = 0; + } + + // Append to the current token + + tok[pos++] = text[i]; + tok[pos] = '\0'; + + // If any of the characters in the token are endflagged, + // Then pass this information along for end-of-sentence + + if (endflag[i]) ef = 1; + } + + // Print the last token + + append_token(buff, sp, tok, ef); + + delete[] tok; + + // Adjust the end of sentence boundaries + + adjust_space(buff); + + return buff; +} + +void MPtok::map_escapes() +{ + char *s; + int j, k, ch; + char buff[10]; + + k = 0; + for (int i = 0; text[i]; i++) + { + if (text[i] == '&' && text[i + 1] == '#') + { + for (s = &buff[0], j = 2; j <= 4 && i + j < text_len && isdigit(text[i + j]); j++) + *s++ = text[i + j]; + *s = '\0'; + ch = atoi(buff); + if (strlen(buff) > 0 && text[i + j] == ';' && ch > 0 && ch <= 256) + { + text[k] = ch; + if (! text[k]) text[k] = ' '; + k++; + i = i + j; + continue; + } + } + text[k++] = text[i]; + } + text[k] = '\0'; + text_len = k; +} + +MPtok::MPtok(string idir, const string& cnam) +{ + tok_initialized = 0; + + if (idir.size() == 0) + { + char *p = getenv("MEDPOST_HOME"); + if (p && strlen(p)) + { + idir = p; + + int found = idir.find("="); + if (found != string::npos) + idir = idir.substr(found + 1); + } + } + + + if (idir.size() == 0) + { + char buff[1000]; + FILE *fp = fopen("path_medpost", "r"); + if (fp) + { + if (fgets(buff, 1000, fp)) + { + chomp(buff); + idir = &buff[0]; + } + fclose(fp); + } + } + + if (idir.size() == 0) + idir = "/home/natxie/CPP64/lib/FIXED_DATA/"; + + option_dir = idir; + + option_token = 1; + option_segment = 1; + option_hyphen = 0; + option_comma = 1; + option_pretok = 0; + option_new = MPTOK_VERSION; + option_doteos = 0; + + if (cnam.size() > 0) + { + option_cnam = "_"; + option_cnam += cnam; + } + + init(); +} + +void MPtok::init(void) +{ + if (tok_initialized) return; + + string fname; + + fname = option_dir + "/medpost" + option_cnam + ".pairs"; + init_pair(fname); + + fname = option_dir + "/medpost" + option_cnam + ".abbr"; + init_abbr(fname); + + tok_initialized = 1; +} + +MPtok::~MPtok() +{ +} + +// Global tokenizer + +string MPtok::tokenize(const string& txt, int mt) +{ + if (option_pretok) return save_string(txt); + + option_token = mt; + text_len = txt.size(); + if (text_len == 0) return string(""); + + text = new char[text_len + 1]; + strcpy(text, txt.c_str()); + + map_escapes(); + + if (text_len == 0) return NULL; + + tokflag = new int[text_len + 1]; + endflag = new int[text_len + 1]; + + set_tokflag(); + if (option_new < 3) + set_endflag(); + else + set_endflag_01(); + + string buff = token_string(); + save_string(buff); + + delete[] text; text = NULL; + delete[] tokflag; tokflag = NULL; + delete[] endflag; endflag = NULL; + + return buff; +} + +string MPtok::tokenize(const string& text) +{ + return tokenize(text, 1); +} + +string MPtok::segment(const string& text) +{ + sent.clear(); + + // tokenize the text + + int save_option_segment = option_segment; + option_segment = 1; + string buff = tokenize(text, 0); + option_segment = save_option_segment; + + if (buff.size() == 0) return text; + + int found = 0; + int pos = 0; + + while (pos < buff.size()) + { + found = buff.find('\n', pos); + if (found == string::npos) + { + sent.push_back(buff.substr(pos)); + pos = buff.size(); + } else + { + sent.push_back(buff.substr(pos, found - pos)); + pos = found + 1; + } + } + + return buff; +} + +string MPtok::save_string(const string& s) +{ + stringstream ss (stringstream::in | stringstream::out); + string w, t; + int found; + string ret; + + word.clear(); + tag.clear(); + + ss << s; + while (ss.good()) + { + ss >> w; + if (w.size() == 0) break; + + found = w.find('_'); + + if (found != string::npos) + { + t = w.substr(found + 1); + w.resize(found); + word.push_back(w); + tag.push_back(t); + } else + { + word.push_back(w); + tag.push_back(option_pretag); + + } + if (ret.size() > 0) ret += " "; + ret += w; + } + + // now look for continuation tags... + + for (int i = 0; i < word.size(); i++) + { + int j = tag[i].size() - 1; + if (j >= 0 && tag[i][j] == '+' && i < tag.size() - 1) + { + word[i] = word[i] + " " + word[i + 1]; + tag[i] = tag[i + 1]; + word.erase(word.begin() + i + 1, word.begin() + i + 2); + tag.erase(tag.begin() + i + 1, tag.begin() + i + 2); + i--; + } + } + + return ret; +} + + +static int count_words(const char *s) +{ + int i; + + i = 1; + for (; *s; ++s) + { + if (*s == ' ') ++i; + } + return i; +} + +static void print_word(const char *s, int i) +{ + for (; i > 0 && *s; ++s) { if (*s == ' ') --i; } + while (*s && *s != ' ') { printf("%c", *s); ++s; } +} + +void MPtok::print(int how) +{ + int i, j, w; + + if (how != 0 && how != 2) + { + printf("print(%d) not defined\n", how); + return; + } + + for (i = 0; i < word.size(); ++i) + { + // Get the words from an idiom + + for (w = 0; w < count_words(word[i].c_str()); ++w) + { + if (how == 2 && i + w > 0) printf(" "); + + print_word(word[i].c_str(), w); + + if (how == 0) + { + printf(" tagged %s", tag[i].c_str()); + if (w < count_words(word[i].c_str()) - 1) printf("+"); + printf("\n"); + } else if (how == 2) + { + printf("%s%s", "_", tag[i].c_str()); + if (w < count_words(word[i].c_str()) - 1) printf("+"); + } + } + } + if (how == 2) + printf("\n"); +} + +void MPtok::merge_words(int s, int n) +{ + string tmp = word[s]; + + for (int i = s + 1; i < s + n; i++) + { + tmp += " "; + tmp += word[i]; + } + + // printf("merging words : '%s' n = %d\n", tmp.c_str(), n); + + for (int k = s; k + n < word.size(); k++) + { + word[k+1] = word[k+n]; + tag[k+1] = tag[k+n]; + } + + // Fixup the remaining array + + word.resize(word.size() - n + 1); + tag.resize(word.size()); + + word[s] = tmp; +} + +void MPtok::split_words() +{ + for (int i = 0; i < word.size(); i++) + { + int found = word[i].find(' '); + + if (found != string::npos) + { + string tmp1(word[i], 0, found); + string tmp2(word[i], found + 1, string::npos); + + // Move all the words and tags down + + word.resize(word.size() + 1); + tag.resize(tag.size() + 1); + + for (int j = word.size() - 1; j > i; j--) + { + word[j] = word[j - 1]; + tag[j] = tag[j - 1]; + } + + word[i] = tmp1; + tag[i] = tag[i+1]; + tag[i] += "+"; + + word[i+1] = tmp2; + } + } +} + +// Callable functions to set internal options + +void MPtok::set_segment(int i) { option_segment = i; } +void MPtok::set_hyphen(int i) { option_hyphen = i; } +void MPtok::set_comma(int i) { option_comma = i; } +void MPtok::set_pretag(char *a) { option_pretag = a; } +void MPtok::set_pretok(int i) { option_pretok = i; } +void MPtok::set_new(int i) { option_new = i; } +void MPtok::set_doteos(int i) { option_doteos = i; } diff --git a/Library/MPtok.h b/Library/MPtok.h index ef3e98efa6e78e7d719c4f518e03ad0e8d13d0ff..0c3634a4bd155a00debb2de6ba1329f5ea672ccb 100644 --- a/Library/MPtok.h +++ b/Library/MPtok.h @@ -1,141 +1,141 @@ -#ifndef _MPTOK_H -#define _MPTOK_H - -#include - -#include -#include -#include -#include - -using namespace std; - -#define MPTOK_VERSION 11 // The latest version - -// Maximum number of words in a sentence - -#define MAX_WORDS 10000 - -enum { ABB_ABB, ABB_EOS, ABB_NUM }; -#define MAX_ABB 100 - -/*! \brief A class to perform tokenization. - * - * The MPtag class can be used to perform tokenization and segmentation - * of strings into tokens or sentences. It is inherited and used by MPtag - * so if the user is only interested in tagging, this class does not - * need to be referenced. - */ - -class MPtok -{ -public: - /// \brief A MPtok object, giving the install directory \p idir where data files can be found - MPtok(string idir = "", const string& cnam = ""); - ~MPtok(); - - void init(); // Initialize (call only once) - void init(const string& idir) { option_dir = idir; init(); } // Initialize using specified install directory - - string option_pretag; // The tag to use on tokens - int option_segment; // Segment into sentences - int option_hyphen; // Hyphens are separate tokens - int option_comma; // Commas are always tokenized - int option_pretok; // The text is pre-tokenized - int option_new; // Use new algorithms, used in development only - int option_doteos; // If " . " occurs, it's an end EOS (new >= 5) - - void set_segment(int i); ///< \brief Sentences are broken up during tokenization (default 1) - void set_token(int i); ///< \brief Break tokens apart with white space (default 1) - void set_hyphen(int i); ///< \brief Hyphens are separate tokens (default 0) - void set_comma(int i); ///< \brief Commas are separate tokens (default 1) - void set_pretag(char *a); ///< \brief Use this tag on all tokens (default empty string) - void set_pretok(int i); ///< \brief Assume string is already tokenized using spaces (default 0) - void set_new(int i); ///< \brief Use a previous algorithm (defaults to most recent) - void set_doteos(int i); ///< \brief Ignore abbreviations, and always assume a period ends a sentence (default 0) - - void merge_words(int s, int e); // merge words between s and e (idiom) - void split_words(void); // split all merged words - - string tokenize(const string&); ///< \brief Tokenize, save (in \p word), and return space delimited tokens - string segment(const string&); ///< \brief Segment, save (in \p sent), and return newline delimited sentences - - string save_string(const string&); // save a buffer - string tokenize_nosave(const string&); // tokenize without saving - string tokenize(const string&,int); // do tokenization with or without inserting spaces between them - - void print(int); ///< \brief Print tokens/tags with given verbosity - - vector word; ///< \brief Vector of words (tokens) of most recently tagged (or tokenized) text - vector tag; ///< \brief Vector of tags of most recently tagged (or tokenized) text - vector sent; ///< \brief Vector of sentences of most recently sentence-segmented text - - char *text; // Input text arg - int text_len; // It's length - int *tokflag; // token flags - int *endflag; // end-sentence flags - - string option_cnam; // A suffix, for opening variant support files - string option_dir; // Directory to find things - -protected: - - void set_tokflag(); - void set_endflag(); - void set_endflag_01(); - int size_buff(); - - void init_pair(const string& file_name); // read a file of common pairs - void init_abbr(const string& file_name); // read a file of abbreviations - - void tok_0(); - void tok_1(); - void tok_2(); - void tok_3(); - void tok_5_6_7(); - void tok_8_9(); - void tok_10(); - void tok_11(); - void tok_12(); - void tok_13(); - void tok_14(); - void tok_15(); - void tok_15_1(); - void tok_16(); - void tok_16_1(); - void tok_17(); - void tok_20(); - void tok_20_1(); - void tok_20_2(); - void tok_21(); - void tok_21a(); - void tok_22(); - void tok_23(); - void tok_24(); - void tok_25(); - void tok_26(); - void tok_27(); - void tok_28(); - void tok_29(); - void tok_29a(); - void tok_30(); - void tok_31(); - void tok_32(); - void tok_33(); - int complex_check(); - void map_escapes(); - void tok_un(); - - void append_token(string&, int&, char*, int); - string token_string(); - - set common_pair; - map common_abbr; - -private: - int option_token; // Output tokenized text (only use internally) - int tok_initialized; // is it inited? -}; - -#endif - +#ifndef _MPTOK_H +#define _MPTOK_H + +#include + +#include +#include +#include +#include + +using namespace std; + +#define MPTOK_VERSION 11 // The latest version + +// Maximum number of words in a sentence + +#define MAX_WORDS 10000 + +enum { ABB_ABB, ABB_EOS, ABB_NUM }; +#define MAX_ABB 100 + +/*! \brief A class to perform tokenization. + * + * The MPtag class can be used to perform tokenization and segmentation + * of strings into tokens or sentences. It is inherited and used by MPtag + * so if the user is only interested in tagging, this class does not + * need to be referenced. + */ + +class MPtok +{ +public: + /// \brief A MPtok object, giving the install directory \p idir where data files can be found + MPtok(string idir = "", const string& cnam = ""); + ~MPtok(); + + void init(); // Initialize (call only once) + void init(const string& idir) { option_dir = idir; init(); } // Initialize using specified install directory + + string option_pretag; // The tag to use on tokens + int option_segment; // Segment into sentences + int option_hyphen; // Hyphens are separate tokens + int option_comma; // Commas are always tokenized + int option_pretok; // The text is pre-tokenized + int option_new; // Use new algorithms, used in development only + int option_doteos; // If " . " occurs, it's an end EOS (new >= 5) + + void set_segment(int i); ///< \brief Sentences are broken up during tokenization (default 1) + void set_token(int i); ///< \brief Break tokens apart with white space (default 1) + void set_hyphen(int i); ///< \brief Hyphens are separate tokens (default 0) + void set_comma(int i); ///< \brief Commas are separate tokens (default 1) + void set_pretag(char *a); ///< \brief Use this tag on all tokens (default empty string) + void set_pretok(int i); ///< \brief Assume string is already tokenized using spaces (default 0) + void set_new(int i); ///< \brief Use a previous algorithm (defaults to most recent) + void set_doteos(int i); ///< \brief Ignore abbreviations, and always assume a period ends a sentence (default 0) + + void merge_words(int s, int e); // merge words between s and e (idiom) + void split_words(void); // split all merged words + + string tokenize(const string&); ///< \brief Tokenize, save (in \p word), and return space delimited tokens + string segment(const string&); ///< \brief Segment, save (in \p sent), and return newline delimited sentences + + string save_string(const string&); // save a buffer + string tokenize_nosave(const string&); // tokenize without saving + string tokenize(const string&,int); // do tokenization with or without inserting spaces between them + + void print(int); ///< \brief Print tokens/tags with given verbosity + + vector word; ///< \brief Vector of words (tokens) of most recently tagged (or tokenized) text + vector tag; ///< \brief Vector of tags of most recently tagged (or tokenized) text + vector sent; ///< \brief Vector of sentences of most recently sentence-segmented text + + char *text; // Input text arg + int text_len; // It's length + int *tokflag; // token flags + int *endflag; // end-sentence flags + + string option_cnam; // A suffix, for opening variant support files + string option_dir; // Directory to find things + +protected: + + void set_tokflag(); + void set_endflag(); + void set_endflag_01(); + int size_buff(); + + void init_pair(const string& file_name); // read a file of common pairs + void init_abbr(const string& file_name); // read a file of abbreviations + + void tok_0(); + void tok_1(); + void tok_2(); + void tok_3(); + void tok_5_6_7(); + void tok_8_9(); + void tok_10(); + void tok_11(); + void tok_12(); + void tok_13(); + void tok_14(); + void tok_15(); + void tok_15_1(); + void tok_16(); + void tok_16_1(); + void tok_17(); + void tok_20(); + void tok_20_1(); + void tok_20_2(); + void tok_21(); + void tok_21a(); + void tok_22(); + void tok_23(); + void tok_24(); + void tok_25(); + void tok_26(); + void tok_27(); + void tok_28(); + void tok_29(); + void tok_29a(); + void tok_30(); + void tok_31(); + void tok_32(); + void tok_33(); + int complex_check(); + void map_escapes(); + void tok_un(); + + void append_token(string&, int&, char*, int); + string token_string(); + + set common_pair; + map common_abbr; + +private: + int option_token; // Output tokenized text (only use internally) + int tok_initialized; // is it inited? +}; + +#endif + diff --git a/Library/Makefile b/Library/Makefile index 7f5d5441dae38c4eebc9f0ff3d03caa4bbbe9fac..bdfd921f52da5b10de80720f8b25f49bd758eadb 100644 --- a/Library/Makefile +++ b/Library/Makefile @@ -1,13 +1,13 @@ -SRC_DIR=./ -TRASHFILES = *.o *~ *.bak core -LIB_INC=-I./ -#.KEEP_STATE: -libops.a: runn.o Btree.o FBase.o Hash.o MPtok.o \ - AbbrStra.o AbbrvE.o Ab3P.o - ar rus $@ $? -OS=-g -%.o: $(SRC_DIR)/%.C - g++ -c $(OS) $< -o $@ $(LIB_INC) - -clean: rm -f $(TRASHFILES) - +SRC_DIR=./ +TRASHFILES = *.o *~ *.bak core +LIB_INC=-I./ +#.KEEP_STATE: +libops.a: runn.o Btree.o FBase.o Hash.o MPtok.o \ + AbbrStra.o AbbrvE.o Ab3P.o + ar rus $@ $? +OS=-g +%.o: $(SRC_DIR)/%.C + g++ -c $(OS) $< -o $@ $(LIB_INC) + +clean: rm -f $(TRASHFILES) + diff --git a/Library/WordData/Ab3P_prec.dat b/Library/WordData/Ab3P_prec.dat index 2c8e0ce9469f31f63558efd3751dd7aa45668304..d613993c8b4b3d00ed3638b68a00c149313188b3 100644 --- a/Library/WordData/Ab3P_prec.dat +++ b/Library/WordData/Ab3P_prec.dat @@ -1,145 +1,145 @@ -Al 1 FirstLetOneChSF 0.967224 -Al 2 FirstLet 0.99818 -Al 2 FirstLetGen 0.994292 -Al 2 WithinWrdFWrd 0.989054 -Al 2 FirstLetGenStp 0.970019 -Al 2 ContLet 0.96935 -Al 2 WithinWrdFLet 0.941981 -Al 2 FirstLetGenSkp 0.949988 -Al 2 WithinWrdFWrdSkp 0.947364 -Al 2 ContLetSkp 0.877216 -Al 2 WithinWrdWrd 0.74768 -Al 2 WithinWrdFLetSkp 0.640805 -Num 2 ContLet 0.975372 -Num 2 ContLetSkp 0.96617 -Num 2 WithinWrdFWrdSkp 0.988426 -Num 2 FirstLetGen2 0.909995 -Num 2 FirstLetGenStp 0.856401 -Num 2 FirstLetGenSkp 0.858132 -Num 2 WithinWrdFWrd 0.726155 -Num 2 WithinWrdFLetSkp 0.607829 -Num 2 WithinWrdFLet 0.493922 -Spec 2 FirstLetGen2 0.854368 -Spec 2 FirstLetGenStp 0.664622 -Spec 2 FirstLetGenSkp 0.657475 -Al 3 FirstLet 0.999808 -Al 3 FirstLetGen 0.999408 -Al 3 FirstLetGenS 0.998732 -Al 3 WithinWrdFWrd 0.997824 -Al 3 FirstLetGenStp 0.997839 -Al 3 FirstLetGenStp2 0.997264 -Al 3 FirstLetGenSkp 0.988583 -Al 3 ContLet 0.987697 -Al 3 WithinWrdFWrdSkp 0.981107 -Al 3 WithinWrdFLet 0.981322 -Al 3 ContLetSkp 0.968185 -Al 3 WithinWrdWrd 0.9437 -Al 3 WithinWrdFLetSkp 0.904799 -Al 3 WithinWrdLet 0.663735 -Al 3 AnyLet 0.303503 -Num 3 FirstLetGen2 0.998497 -Num 3 WithinWrdFWrd 0.99964 -Num 3 FirstLetGenStp 0.998807 -Num 3 FirstLetGenStp2 0.991256 -Num 3 FirstLetGenSkp 0.991202 -Num 3 ContLet 0.996938 -Num 3 WithinWrdFWrdSkp 0.998821 -Num 3 WithinWrdFLet 0.985676 -Num 3 ContLetSkp 0.995076 -Num 3 WithinWrdWrd 0.999245 -Num 3 WithinWrdFLetSkp 0.971123 -Num 3 WithinWrdLet 0.819989 -Num 3 AnyLet 0.797932 -Spec 3 FirstLetGen2 0.978311 -Spec 3 FirstLetGenStp 0.977779 -Spec 3 FirstLetGenStp2 0.929197 -Spec 3 WithinWrdFWrd 0.930654 -Spec 3 ContLet 0.923911 -Spec 3 FirstLetGenSkp 0.904086 -Spec 3 WithinWrdFWrdSkp 0.893989 -Spec 3 ContLetSkp 0.851583 -Spec 3 WithinWrdFLet 0.712331 -Spec 3 WithinWrdFLetSkp 0.64667 -Spec 3 WithinWrdWrd 0.428 -Al 4 FirstLet 0.999964 -Al 4 FirstLetGen 0.99993 -Al 4 FirstLetGenS 0.999811 -Al 4 WithinWrdFWrd 0.999616 -Al 4 FirstLetGenStp 0.999868 -Al 4 FirstLetGenStp2 0.999948 -Al 4 FirstLetGenSkp 0.998534 -Al 4 ContLet 0.992792 -Al 4 WithinWrdFWrdSkp 0.997097 -Al 4 WithinWrdFLet 0.992955 -Al 4 ContLetSkp 0.985568 -Al 4 WithinWrdWrd 0.995823 -Al 4 WithinWrdFLetSkp 0.976873 -Al 4 WithinWrdLet 0.917863 -Al 4 AnyLet 0.696532 -Num 4 FirstLetGen2 0.99992 -Num 4 WithinWrdFWrd 0.999835 -Num 4 FirstLetGenStp 0.999903 -Num 4 FirstLetGenStp2 0.999936 -Num 4 FirstLetGenSkp 0.999577 -Num 4 ContLet 0.999555 -Num 4 WithinWrdFWrdSkp 0.999885 -Num 4 WithinWrdFLet 0.9975 -Num 4 ContLetSkp 0.998578 -Num 4 WithinWrdWrd 0.997703 -Num 4 WithinWrdFLetSkp 0.996501 -Num 4 WithinWrdLet 0.986326 -Num 4 AnyLet 0.953126 -Spec 4 FirstLetGen2 0.99278 -Spec 4 FirstLetGenStp 0.98597 -Spec 4 FirstLetGenStp2 0.982127 -Spec 4 WithinWrdFWrd 0.997649 -Spec 4 ContLet 0.980869 -Spec 4 FirstLetGenSkp 0.944843 -Spec 4 WithinWrdFWrdSkp 0.985685 -Spec 4 ContLetSkp 0.973983 -Spec 4 WithinWrdFLet 0.992773 -Spec 4 WithinWrdFLetSkp 0.863247 -Spec 4 WithinWrdWrd 0.931745 -Spec 4 WithinWrdLet 0.418068 -Spec 4 AnyLet 0.223562 -Al 5 FirstLet 0.999979 -Al 5 FirstLetGen 0.999979 -Al 5 FirstLetGenS 0.999913 -Al 5 WithinWrdFWrd 0.999928 -Al 5 FirstLetGenStp 0.999989 -Al 5 FirstLetGenStp2 0.999887 -Al 5 FirstLetGenSkp 0.999852 -Al 5 ContLet 0.997596 -Al 5 WithinWrdFWrdSkp 0.999602 -Al 5 WithinWrdFLet 0.997473 -Al 5 ContLetSkp 0.989703 -Al 5 WithinWrdWrd 0.999812 -Al 5 WithinWrdFLetSkp 0.986066 -Al 5 WithinWrdLet 0.889324 -Al 5 AnyLet 0.73859 -Num 5 FirstLetGen2 0.999987 -Num 5 WithinWrdFWrd 0.999922 -Num 5 FirstLetGenStp 0.99998 -Num 5 FirstLetGenStp2 1 -Num 5 FirstLetGenSkp 0.999901 -Num 5 ContLet 0.999613 -Num 5 WithinWrdFWrdSkp 0.999937 -Num 5 WithinWrdFLet 0.999386 -Num 5 ContLetSkp 0.999312 -Num 5 WithinWrdWrd 1 -Num 5 WithinWrdFLetSkp 0.998939 -Num 5 WithinWrdLet 0.996068 -Num 5 AnyLet 0.986193 -Spec 5 FirstLetGen2 0.999701 -Spec 5 FirstLetGenStp 0.9999 -Spec 5 FirstLetGenStp2 0.999757 -Spec 5 WithinWrdFWrd 0.999517 -Spec 5 ContLet 0.994648 -Spec 5 FirstLetGenSkp 0.997065 -Spec 5 WithinWrdFWrdSkp 0.998513 -Spec 5 ContLetSkp 0.992445 -Spec 5 WithinWrdFLet 0.996623 -Spec 5 WithinWrdFLetSkp 0.978026 -Spec 5 WithinWrdWrd 0.996879 -Spec 5 WithinWrdLet 0.862993 +Al 1 FirstLetOneChSF 0.967224 +Al 2 FirstLet 0.99818 +Al 2 FirstLetGen 0.994292 +Al 2 WithinWrdFWrd 0.989054 +Al 2 FirstLetGenStp 0.970019 +Al 2 ContLet 0.96935 +Al 2 WithinWrdFLet 0.941981 +Al 2 FirstLetGenSkp 0.949988 +Al 2 WithinWrdFWrdSkp 0.947364 +Al 2 ContLetSkp 0.877216 +Al 2 WithinWrdWrd 0.74768 +Al 2 WithinWrdFLetSkp 0.640805 +Num 2 ContLet 0.975372 +Num 2 ContLetSkp 0.96617 +Num 2 WithinWrdFWrdSkp 0.988426 +Num 2 FirstLetGen2 0.909995 +Num 2 FirstLetGenStp 0.856401 +Num 2 FirstLetGenSkp 0.858132 +Num 2 WithinWrdFWrd 0.726155 +Num 2 WithinWrdFLetSkp 0.607829 +Num 2 WithinWrdFLet 0.493922 +Spec 2 FirstLetGen2 0.854368 +Spec 2 FirstLetGenStp 0.664622 +Spec 2 FirstLetGenSkp 0.657475 +Al 3 FirstLet 0.999808 +Al 3 FirstLetGen 0.999408 +Al 3 FirstLetGenS 0.998732 +Al 3 WithinWrdFWrd 0.997824 +Al 3 FirstLetGenStp 0.997839 +Al 3 FirstLetGenStp2 0.997264 +Al 3 FirstLetGenSkp 0.988583 +Al 3 ContLet 0.987697 +Al 3 WithinWrdFWrdSkp 0.981107 +Al 3 WithinWrdFLet 0.981322 +Al 3 ContLetSkp 0.968185 +Al 3 WithinWrdWrd 0.9437 +Al 3 WithinWrdFLetSkp 0.904799 +Al 3 WithinWrdLet 0.663735 +Al 3 AnyLet 0.303503 +Num 3 FirstLetGen2 0.998497 +Num 3 WithinWrdFWrd 0.99964 +Num 3 FirstLetGenStp 0.998807 +Num 3 FirstLetGenStp2 0.991256 +Num 3 FirstLetGenSkp 0.991202 +Num 3 ContLet 0.996938 +Num 3 WithinWrdFWrdSkp 0.998821 +Num 3 WithinWrdFLet 0.985676 +Num 3 ContLetSkp 0.995076 +Num 3 WithinWrdWrd 0.999245 +Num 3 WithinWrdFLetSkp 0.971123 +Num 3 WithinWrdLet 0.819989 +Num 3 AnyLet 0.797932 +Spec 3 FirstLetGen2 0.978311 +Spec 3 FirstLetGenStp 0.977779 +Spec 3 FirstLetGenStp2 0.929197 +Spec 3 WithinWrdFWrd 0.930654 +Spec 3 ContLet 0.923911 +Spec 3 FirstLetGenSkp 0.904086 +Spec 3 WithinWrdFWrdSkp 0.893989 +Spec 3 ContLetSkp 0.851583 +Spec 3 WithinWrdFLet 0.712331 +Spec 3 WithinWrdFLetSkp 0.64667 +Spec 3 WithinWrdWrd 0.428 +Al 4 FirstLet 0.999964 +Al 4 FirstLetGen 0.99993 +Al 4 FirstLetGenS 0.999811 +Al 4 WithinWrdFWrd 0.999616 +Al 4 FirstLetGenStp 0.999868 +Al 4 FirstLetGenStp2 0.999948 +Al 4 FirstLetGenSkp 0.998534 +Al 4 ContLet 0.992792 +Al 4 WithinWrdFWrdSkp 0.997097 +Al 4 WithinWrdFLet 0.992955 +Al 4 ContLetSkp 0.985568 +Al 4 WithinWrdWrd 0.995823 +Al 4 WithinWrdFLetSkp 0.976873 +Al 4 WithinWrdLet 0.917863 +Al 4 AnyLet 0.696532 +Num 4 FirstLetGen2 0.99992 +Num 4 WithinWrdFWrd 0.999835 +Num 4 FirstLetGenStp 0.999903 +Num 4 FirstLetGenStp2 0.999936 +Num 4 FirstLetGenSkp 0.999577 +Num 4 ContLet 0.999555 +Num 4 WithinWrdFWrdSkp 0.999885 +Num 4 WithinWrdFLet 0.9975 +Num 4 ContLetSkp 0.998578 +Num 4 WithinWrdWrd 0.997703 +Num 4 WithinWrdFLetSkp 0.996501 +Num 4 WithinWrdLet 0.986326 +Num 4 AnyLet 0.953126 +Spec 4 FirstLetGen2 0.99278 +Spec 4 FirstLetGenStp 0.98597 +Spec 4 FirstLetGenStp2 0.982127 +Spec 4 WithinWrdFWrd 0.997649 +Spec 4 ContLet 0.980869 +Spec 4 FirstLetGenSkp 0.944843 +Spec 4 WithinWrdFWrdSkp 0.985685 +Spec 4 ContLetSkp 0.973983 +Spec 4 WithinWrdFLet 0.992773 +Spec 4 WithinWrdFLetSkp 0.863247 +Spec 4 WithinWrdWrd 0.931745 +Spec 4 WithinWrdLet 0.418068 +Spec 4 AnyLet 0.223562 +Al 5 FirstLet 0.999979 +Al 5 FirstLetGen 0.999979 +Al 5 FirstLetGenS 0.999913 +Al 5 WithinWrdFWrd 0.999928 +Al 5 FirstLetGenStp 0.999989 +Al 5 FirstLetGenStp2 0.999887 +Al 5 FirstLetGenSkp 0.999852 +Al 5 ContLet 0.997596 +Al 5 WithinWrdFWrdSkp 0.999602 +Al 5 WithinWrdFLet 0.997473 +Al 5 ContLetSkp 0.989703 +Al 5 WithinWrdWrd 0.999812 +Al 5 WithinWrdFLetSkp 0.986066 +Al 5 WithinWrdLet 0.889324 +Al 5 AnyLet 0.73859 +Num 5 FirstLetGen2 0.999987 +Num 5 WithinWrdFWrd 0.999922 +Num 5 FirstLetGenStp 0.99998 +Num 5 FirstLetGenStp2 1 +Num 5 FirstLetGenSkp 0.999901 +Num 5 ContLet 0.999613 +Num 5 WithinWrdFWrdSkp 0.999937 +Num 5 WithinWrdFLet 0.999386 +Num 5 ContLetSkp 0.999312 +Num 5 WithinWrdWrd 1 +Num 5 WithinWrdFLetSkp 0.998939 +Num 5 WithinWrdLet 0.996068 +Num 5 AnyLet 0.986193 +Spec 5 FirstLetGen2 0.999701 +Spec 5 FirstLetGenStp 0.9999 +Spec 5 FirstLetGenStp2 0.999757 +Spec 5 WithinWrdFWrd 0.999517 +Spec 5 ContLet 0.994648 +Spec 5 FirstLetGenSkp 0.997065 +Spec 5 WithinWrdFWrdSkp 0.998513 +Spec 5 ContLetSkp 0.992445 +Spec 5 WithinWrdFLet 0.996623 +Spec 5 WithinWrdFLetSkp 0.978026 +Spec 5 WithinWrdWrd 0.996879 +Spec 5 WithinWrdLet 0.862993 Spec 5 AnyLet 0.745608 \ No newline at end of file diff --git a/Library/WordData/Lf1chSf b/Library/WordData/Lf1chSf index ffd6b08b8da685479ad4435a6fcb5416cbe21bb0..181064bf4db27035186d9349ef2395dceda6f8d5 100644 --- a/Library/WordData/Lf1chSf +++ b/Library/WordData/Lf1chSf @@ -1,4991 +1,4991 @@ -a -a-wave -a/j -abdominal -abnormal -abortus -abscesses -absence -absent -absolute -absorbance -absorbency -absorptance -absorption -acarbose -acceleration -acceptor -acceptors -accessory -accumulation -accuracy -acebutolol -acetaldehyde -acetamide -acetaminophen -acetate -acetazolamide -acetoacetylation -acetone -acetonitrile -acetylcholine -acid -acidic -acids -acini -aconitine -acquired -acquisition -acrocentric -acromegaly -act -actin -action -action: -activated -activation -activator -active -activities -activitrax -activity -activity: -actuator -acupuncture -acute -addition -additive -addressed: -adductor -adducts -adenine -adenines -adenoidectomy -adenoma -adenomas -adenosine -adenylate -adenylation -adequate -adherent -adipocyte -admission -adrenalectomy -adrenalin -adrenaline -adrenaline-containing -adrenaline-storing -adrenergic -adriamycin -adult -adults -advanced -advantages: -adventurous -aerobacter -aerobic -aeromonas -affected -affective -afferent -african -africans -after -afternoon -age -aged -aggregated -aggression -aggressive -agonist -agonists -agouti -agreeableness -aims: -air -airways -akimbo -al(2)o(3) -ala -alanine -albumin -albuterol -alcohol -alcoholic -alcoholism -aldolase -aldosterone -alert -alfentanil -all-adenine -allele -alleles -allergic -allergy -allopurinol -alloxan -almitrine -alone -along -alopecia -alpha -alpha-methyldopa -alprazolam -alternating -altitude -alveolar -always -ambulatory -amenorrheic -american -amifostine -amikacin -amiloride -aminoacyl -aminoacyl-trna -aminophylline -amiodarone -amitriptyline -amlodipine -amniotomy -amodiaquine -amorphous -amount -amounts -amoxicillin -amoxycillin -amperes -amphetamine -ampicillin -amplified -amplitude -amplitudes -ampulla -ampullary -amrinone -amygdala -amylase -amyloid -amyloidosis -an -anaemia -analgesia -analogue -analysis -analysis, -analysis: -analyzed: -and -and/or -androgen -androgens -andromonoecious -androstendione -androstenedione -androsterone -anemic -anergic -anesthesia -anesthetized -anestrous -anestrus -aneuploid -aneuploidy -angina -angiography -angiotensin -angle -angstrom -angstroms -angus -aniline -animal -animals -anionic -anions -aniridia -ankyloglossia -annulus -anomalies -anomaly -anorexia -anoxia -antagonist -anterior -anteriorly -antheraxanthin -anthracene -anthracycline -anthracyclines -anti -anti-lewis -antibiotic -antibodies -antibody -antigen -antipyrine -antrectomy -antrum -anxiety -aorta -aortic -apex -apical -apnea -apneas -apo -apolipoprotein -apolipoproteins -apomorphine -apoprotein -apoptosis -approaches: -aprepitant -aprotinin -aqueous -arabinoside -arachidonate -arches -are -are: -area -areas -areas: -argas -arginase -arginine -arm -arms -arrhythmia -arterial -arteries -arterioles -artery -arthritic -as -as: -ascites -asians -asked: -aspects: -aspirin -assimilation -associative -assumptions: -asthma -asthmatic -asthmatics -astrocytes -astrocytoma -astrocytomas -asymmetric -asymmetrical -asymmetry -asymptomatic -asymptote -asymptotic -at -ataxia -atenolol -atherogenic -athletes -atomic -atopic -atorvastatin -atracurium -atrazine -atresia -atretic -atria -atrial -atrium -atropine -attenuated -attenuation -atypical -aubert -auditory -auditory-alone -auditory-only -austria -austrian -autism -autoclaved -autologous -automatic -autonomy -autumn -auxotype -available -average -avian -avidin -avirulent -avoidant -axial -axillary -axis -azathioprine -azithromycin -b -b.i.d. -b/beijing/184/93 -b4 -baboon -background -backward -bacteremia -bacteremic -bacteria -bacterial -bacteriochlorophyll -bacteriochlorophylls -bacteroides -baicalein -bakumondo-to -balance -balanced -balb/cj -balloon -band -banded -bar -barbital -barley -barrier -barring -basal -basalis -base -baseline -bases -basic -basicity -basolateral -basophils -bath -bathorhodopsin -bcnu -beads -beans -bearing -beats -becke -beclomethasone -beef -before -belgium -belgrade -beliefs -benazepril -bendamustine -bending -benfluorex -benfluron -benign -benignity -bentonite -benzamide -benzene -benzo -benzoate -benzylamide -bepridil -bermudagrass -berodual -beta -betamethasone -betaxolol -bezafibrate -bi- -biarra -bias -bicarbonate -bicycle -bilateral -bilirubin -binding -binocular -bioaccumulation -bioactive -bioactivity -bioassay -biofeedback -biological -biomass -biopsies -biopsy -biopterin -biotin -biotypes -biphasic -biphenyl -biphenylene -biphenyls -birth -bisexual -bisoprolol -bivalent -black -blackbelly -blacks -bladder -blank -bleeding -bleomycin -block -blocked -blocking -blood -blue -blunt -body -bolus -bombesin -bonding -bone -bone-marrow-derived -borderline -boron -bosentan -bottom -bottom-component -bound -bouton -bovine -boys -brace -bradykinin -brahman -brain -bran -brassica -breakdown -breakfast -breast -breast-fed -breathiness -breathlessness -bregma -brightness -brodimoprim -broiler -bromocriptine -bronchial -bronchitis -broth -brown -broxaterol -brushing -buccal -budesonide -buf/mna -buffer -bulb -bulimia -bulinus -bumetanide -bupivacaine -buprenorphine -burn -burned -bursa-equivalent -bursal -bursal-derived -bursting -buserelin -butanol -butorphanol -butter -butyl -c -c(5)h(8), -c(6)h(10), -c-peptide -c-terminal -c57bl/6j -ca2+. -caerulein -caffeine -calcitonin -calcitriol -calculus -calorie -calves -calyx -canadian -cancer -candesartan -candida -candidate -canine -capacitance -capacitor -capacity -capecitabine -capillaries -capillary -caprine -capsaicin -capsid -captopril -carbachol -carbamate -carbamazepine -carbaryl -carbohydrate -carbohydrate-rich -carbohydrates -carbomer -carbon -carboplatin -carboxy -carboxy- -carboxy-terminal -carboxyl -carboxyl-terminal -carboxylated -carcass -carcinoma -cardiac -cardiac-lethal -cardiologists -cardioversion -care -carminomycin -carnitine -carotenoid -carotid -carrageenan -carvedilol -case -casein -cases -cast -castrated -castration -casual -catabolism -catalase -catalyst -catalytic -cataract -catechin -catechol -catecholamines -catholic -cation -cationic -cations -cattle -caucasian -caucasians -caudal -caudata -caudate -cavity -cecum -cefotaxime -ceftazidime -ceftriaxone -cefuroxime -celecoxib -celiprolol -cell -cellobiase -cells -cells; -cellular -cellulose -cellulosic -celsius -cemented -cementum -cent -center -centigrade -central -centrilobular -centroid -centromeric -cephalothin -cerebellum -cerebrum -cerivastatin -cervical -cervix -cesarean -cetirizine -ch -chain -chains -chance -charolais -cheese -chemical -chemically-cured -chemiluminescence -chemotherapy -cherry -chicken -child -children -chimaeric -chimeric -chinese -chitosan -chitose -chlamydia -chloramphenicol -chlorhexidine -chlorination -chloroform -chloroquine -chlorpromazine -chlorthalidone -choice -cholate -cholecystectomy -cholesterol -cholesterols -cholestyramine -cholic -chow -chroma -chronaxie -chronic -chymase -cibenzoline -cicletanine -cilazapril -ciliated -cimetidine -ciprofloxacin -circular -circulating -circumference -circumferences -circumferential -cirrhosis -cis -cisatracurium -cisplatin -cisplatinum -cisternal -citrate -clamping -clarithromycin -class -classes -classic -classical -clearance -clearances -clindamycin -clinic -clinical -clockwise -clodronate -clomipramine -clonidine -clopidogrel -closed -closed-spiracle -clover -clozapine -cluster -clustered -clusters -cm -cm. -cm/m2 -cm3 -coactivation -coaptation -cobb -cocaine -codeine -coefficient -coefficients -coherence -cohesion -cohort -coil -colchicine -cold -cold-sensitive -collagen -collagenase -colon -colonic -colostrum -colour -columbia -columbinate -combination -combined -combustion -comfort -comfortable -common -community -compact -comparison -compensation -competence -competition -competitive -complaints -complement -complement- -complementary -complete -completeness -complex -complexed -compliance -compliances -compliant -complications -component -components -composite -compost -compounds -compression -compton -concentrate -concentration -concentrations -concise -concordance -concrete -concurrent -condensation -condensed -condition -conditioned -conditioning -conditions -conductance -cone -confederate -configuration -configurations -confined -confinement -confusion -conjugated -connecting -connection -conscientiousness -conserved -consonant -consonants -constant -constant-region -constants -constitutive -consumers -consumption -contact -contemplation -content -context -contingency -continuity -continuous -continuously -contractions -contralateral -contrast -contrasts -control -controlled -controller -controls -convection -conventional -convergent -conversion -cooh-terminal -cooling -cooperate -cooperation -cooperativeness -coopworth -copulating -copyright -cord -core -cores -corn -corneal -cornstarch -coronal -coronary -corrected -correlation -cortex -cortical -corticosteroid -corticosterone -cortisol -corynebacterium -cost -costs -cotton -coulomb -coulombs -coumarin -couplets -course -cream -creatine -creatinine -criteria -criterion -cryia -csf -cubic -culex -culture -cultures -cuprophan -curantyl -curcumin -cured -curvilinear -cutaneous -cyclase -cycle -cycles -cyclic -cycling -cyclists -cycloheximide -cyclohydrolase -cyclophosphamide -cylinders -cys -cysteine -cysts -cytidine -cytochrome -cytology -cytoplasm -cytoplasmatic -cytoplasmic -cytosine -cytosines -cytosol -cytosolic -d -d'man -d-loop -d-tga -dacarbazine -dai-saiko-to -daidzein -daily -dalton -daltons -dam -danazol -dantrolene -dark -darkness -darkschewitsch -daunomycin -daunorubicin -day -days -daytime -db1 -dba/2j -dbd-pz-nh2 -de-activated -dead -deafness -death -deaths -debrisoquin -debrisoquine -decapitated -decayed -decidua -decrease -decreases -decreasing -deep -defect -defection -defective -defibrotide -deficiency -deficient -deficit -deficits -defined -definite -deflation -degenerated -degradation -degrees -dehydrated -dehydration -dehydroepiandrosterone -dehydrogenase -deiodinase -delay -delayed -deletion -delirium -delivery -delta -deltamethrin -deltoides -demented -dementia -denatured -denervated -denervation -denmark -dense -densities -density -dentavax -dentin -deoxy -depolarizing -deposition -deprenyl -depressed -depression -deprivation -deprived -depth -dermatographism -dermis -dermographism -dermorphin -descending -desflurane -desiccation-selected -desmin -desmopressin -desmosine -desmosome -desogestrel -desynchronized -detail -detection -detergent -determinant -deurenberg -deuterated -deuterium -development -deviation -dexamethasone -dextran -dextro -dextromethorphan -dextropropoxyphene -dextrorotatory -dextrose -dhofari -diabetes -diabetic -diabetics -diagnosis -dialysance -dialysate -dialysis -dialyzable -dialyzed -diameter -diameters -diamond -diaphorase -diaphragm -diarrhea -diastole -diastolic -diazepam -diazinon -diazoxide -dibekacin -dibenzyline -dichloromethane -dichlorvos -diclofenac -died -diestrous -diestrus -diet -diet, -difference -differences -differentiated -diffuse -diffusion -diffusivities -diffusivity -diflunisal -digestibility -digital -digits -digora -digoxin -dihydrotestosterone -dihydrouridine -dilantin -diltiazem -diluent -dilute -dilution -dimension -dimensional -dimensions -dimer -dimeric -dimers -dimethoate -dimorphic -diopter -diopters -dioptre -dioptres -diphenhydramine -diphtheria -diploid -dipolar -dipyridamole -direct -directional -directly -disability -disc -discomfort -discontinuous -discrepancy -discriminability -discrimination -disease -disequilibrium -disgnathous -disinhibition -disobutamide -disopyramide -disorganized -dispersalloy -dispersed -dispersion -displacement -dissimilarity -dissipation -dissociative -distal -distance -distances -distensibility -distraction -distribution -disulfiram -dithranol -diuretic -diuretics -diurnal -divergence -divergent -diversity -division -dna -dobutamine -doca -docetaxel -dog -dolichol -domain -domains -dominance -dominant -donor -donors -dopamine -dopexamine -doppler -dormant -dorsal -dorsalis -dorset -dose -doses -double -down -doxazosin -doxorubicin -doxycycline -dp -drag -drainage -draining -dried -drink -drinkers -drive -driver -drivers -droperidol -drosophila -drowsiness -drug -drugs -dry -dual-marked -duarte -duodenum -duration -durations -during -duroc -dutasteride -dwarf -dye -dyes -dynamic -dyract -dysfunction -dysplasia -dyspnea -dysthymia -dystrophic -e -e(2) -e(t) -e-wave -early -east -eastern -ebony -eccentricity -ecdysone -ecgonine -echo -echocardiogram -echocardiographic -echocardiography -echovirus -eclampsia -ecori -ectostriatum -eczema -edatrexate -edema -edta -education -effect -effective -effectiveness -effector -effectors -effects -efficacies -efficacy -efficiencies -efficiency -efflux -effort -egg -eggen -eicosanoids -either -ejaculation -ejection -elastance -elastase -elastic -elasticity -elastin -elderly -elective -electric -electrical -electrolyte -electrolytes -electron -electronic -eledoisin -element -elements -elevated -elevation -elimination -elite -ellipticine -elliptocytosis -elongation -ema -embolic -embryo -embryogenic -embryonal -embryonic -emission -emotional -emotionality -emphysema -empty -enalapril -enamel -enantioenriched -enantiomeric -enantiopure -enantioselectivity -encainide -encephalitis -encoding -end -endocrinopathy -endometriosis -endometrium -endosulfan -endothelial -endothelin -endothelioid -endothelium -endotoxin -endralazine -endurance -endurance-only -endurance-trained -energies -energy -enflurane -english -enhancement -enhancer -enkephalin -enoxaparin -enoximone -enprostil -enriched -enrichment -entacapone, -enterobacteriaceae -enterobactin -entity -entropy -entry -envelop -envelope -environment -environmental -environments -enzyme -eosinophil -eosinophils -ependymomas -ephedrine -epidemic -epidermal -epidermis-type -epididymal -epididymis -epidoxorubicin -epidural -epilepsy -epimerase -epimerization -epinephrine -epiphysis -epirubicin -epitestosterone -epithelial -epithelioid -epithelium -epq-r -epsilometer -epsilon -equatorial -equine -equipment -equivalents -erect -ergometry -ergotamine -erosive -error -errors -erysipelas -erythema -erythroagglutinating -erythrocyte -erythrocytes -erythrocytic -erythroid -erythromycin -erythropoiesis -erythropoietic -erythropoietin -escharectomy -escherichia -esmolol -esophageal -esophagitis -essential -ester -esterified -estimates -estimation -estradiol -estradiol-17beta -estrodiol -estrogen -estrogen-treated -estrogenic -estrogenized -estrogens -estrone -estrous -estrus -etched -ethambutol -ethanol -ethanol-containing -ethanol-exposed -ethanol-fed -ethanolamine -ether -ethidium -ethmozine -ethyl -ethylbenzene -ethylene -etidocaine -etidronate -etilefrine -etintidine -etiocholanolone -etoh -etomidate -etoposide -euchromatic -eudragit -eugenol -eugenol, -euglycemia -euglycemic -european -europeans -euthanasia -euthyroid -evaluative -evaporation -evening -evenness -evoked -exacerbations -exam -examined, -excellent -excimer -excitation -excitatory -excited -exclusive -exclusively -excretion -execution -exercise -exercise-trained -exercised -exfoliative -exhaustion -exit -exoplasmic -expectancy -expected -expenditure -experienced -experimental -experimenter -experiments -expiration -expiratory -exponential -exposed -exposure -exposures -expressed -expressiveness -extended -extensibility -extension -extensive -extensor -extent -external -extinction -extracellular -extract -extracted -extraction -extractions -extracts -extranodal -extraversion -extraversion-introversion -extroversion -eye -f -f/sigma -f2000 -face -facial -facilitating -factor -factor" -factors -factors; -failing -failure -failures -fair -fall -false -familial -familiar -famotidine -farnesol -fast -fast-twitch -fasted -fasting -fat -fat-rich -fatal -father -fatigue -favourable -feathers -fecal -feces -fecundity -fed -feeding -felodipine -female -females -feminine -femininity -feminization -femoral -femur -fenfluramine -fenofibrate -fenoldopam -fenoterol -fentanyl -fermented -ferning -ferritin -ferromagnetic -ferryl -fertility -fertilization -fertilized -fertilizer -fetal -fetus -fetuses -fever -fi- -fiber -fibers -fibre -fibres -fibrin -fibrinogen -fibroblast -fibroblast-like -fibroblastic -fibroblasts -fibronectin -fibrosis -field -filamentous -filaments -filled -fillets -film -filtering -filtration -fimbrial -final -finally -finally, -finasteride -finger -finger-like -finishers -finnsheep -first -fischer -fish -fistula -fistulotomy -fit -fitness -flagellar -flat -flattened -flavonoids -flaxseed -flecainide -flexed-tail -flexical -flexion -flexor -flight -floating -flobufen -flow -flow-rate -flowers -flows -fluconazole -fluctuation -fludarabine -fludrocortisone -fluid -flumazenil -flunisolide -flunitrazepam -flunixin -fluorescein -fluorescence -fluorescent -fluoridated -fluoride -fluoride-resistant -fluorides -fluorine -fluoro -fluorophore -fluorouracil -fluoxetine -flurbiprofen -fluroshield -flutamide -flutter -fluttering-spiracle -fluvastatin -flux -foam -foetal -foil -folacin-deficient -folate -folded -follicles -follicular -for -force -forces -forget -forked -form -formaldehyde -formaldehyde-treated -formalin -formalin-fixed -formamide -formoterol -formula -formulation -fornix -forskolin -forssman -forward -fosinopril -fotemustine -foveal -fpd -fraction -fractionated -fractions -fracture -fragment -frame -frames -france -francs -free -freeze-branded -french -frequencies -frequency -frequent -frequently/always -fresh -freundlich -friesian -frizzle -front -frontal -fructosamine -fructose -fruit -frusemide -fsh -full -function -function, -functional -functioning -fundamental -fundus -furanose -furazolidone -furosemide -fusion -fusogenic -g -g-csf -g-protein -g-proteins -g/ml -gain -galactomannan -galactose -gallopamil -gamma -gaps -garlic -garnet -gas -gaseous -gastric -gastrin -gastrin-expressing -gastrin-producing -gastrocnemius -gastrointestinal -gastroschisis -gastrostomy -gastrozepin -gauche -gauge -gauss -gaussian -gauze -gavage -gel -gelatin -gelbvieh -gelsolin -gemcitabine -gemfibrozil -gender -gene -general -generalizability -generation -generations -genes -genetic -genistein -genital -genogroup -genomic -genotoxic -genotype -gentamicin -gentamycin -gentisate -gerbil -german -germany -germinating -germination -germline -gestation -gestodene -giemsa -gingivitis -ginsenosides -girls -girth -glandular -glass -glaucoma -gliadin -glial -glibenclamide -globular -globulin -glomerular -glomeruli -glu27 -glucagon -glucocorticoid -glucocorticoids -gluconate -glucosamine -glucose -glucosyl -glucuronide -glucuronides -glutamate -glutaraldehyde -glutathione -gly -gly16 -glyburide -glycated -glycemia -glycerol -glycine -glyco- -glycogen -glycogen-binding -glycoprotein -glycoproteins -glycopyrrolate -glycosylated -glycyrrhizin -glyoxal -gold -golgi -good -gossypol -grade -gradient -gradients -grading -graft -grain -gram -grams -granisetron -granular -granules -granulocyte -granulocytes -granulocytic -granulosa -grass -gravimetric -gravitational -gravity -grazing -greece -green -grey -grip -grooming -gross -ground -group -groups -groups; -grower -growing -growth -gtp-binding -gtpase -guaiacyl -guanethidine -guanfacine -guanidine -guanidinium -guanine -guanine-binding -guanine-nucleotide-binding -guanines -guanosine -guanosines -guanylate -guide -guluronic -gum -gvhd -gymnasts -gyroid -h -h2o2 -habituation -haemagglutinin -haematocrit -haemoglobin -haemolysis -hair -hairless -hairy -half-life -half-lives -halofantrine -halofuginone -haloperidol -halothane -hampshire -hamster -hamstring -hamstrings -hancock -handled -handling -haplotype -haptoglobin -harasima -hardened -hardness -harlan -harman -harmonic -harnesses -hartley -harvey -hatched -hatching -hay -hcl -hcv-1a -head -healing -healthy -hearing -heart -heart-type -hearts -heat -heated -heating -heavily -heavy -heavy-chain -heel -heifers -height -heights -hektoen -helical -helicase -helicobacter -helix -helper -hemagglutinin -hematocrit -hematoxylin -hematuria -hemisphere -hemispheres -hemispheric -hemodialysis -hemoglobin -hemolysis -hemophan -hemorrhage -hemorrhagic -heparin -hepatic -hepatocytes -heptane -herculite -hereford -heritability -heroin -herring -hertfordshire -hetastarch -heterochromatic -heterogeneous -heteronomous -heterophil -heterophils -heterosis -heterozygosities -heterozygosity -heterozygotes -hexagonal -hexamethylmelamine -hexanucleotide -hibernating -high -high- -high-affinity -high-grade -high-yield -high-yielding -higher -highest -highly -hilltop -hinge -hip -hippocampal -hippocampus -his -hispanic -hispanics -histamine -histidine -histidines -histocompatability -histocompatibility -histology -histone -histones -hoagland -hodgkin -hodgkin's -hoechst -hoffman -hoffmann -holland -holstein -home -homogenate -homogeneity -homogeneous -hooded -hopelessness -hopping -horizontal -hormonal -hormone -horse -hospital -hospitals -host -hostility -hot-iron-branded -houghton -hour -hours -hubbard -hue -hulls -human -humans -humerus -humidity -humoral -hyaluronidase -hybrid -hydralazine -hydrated -hydration -hydrochloride -hydrochlorothiazide -hydrocortisone -hydrogen -hydrolysate -hydromorphone -hydrophilic -hydrophobic -hydroxyurea -hydroxyzine -hyperactive -hypercholesterolemic -hyperglycemia -hyperglycemic -hypericin -hyperlipemic -hyperlipidemic -hyperoxia -hyperoxic -hyperplasia -hyperplasias -hyperpolarizing -hyperstimulated -hypertension -hypertensive -hypertensives -hyperthermia -hyperthyroid -hyperthyroidism -hypertonic -hypertrophic -hypertrophied -hypertrophy -hyperventilation -hyphal -hypocapnia -hypochromic -hypokinesis -hypophysectomized -hypophysectomy -hypopituitarism -hypopneas -hypotension -hypothalamic -hypothalamus -hypothermia -hypothyroid -hypothyroidism -hypoxanthine -hypoxemia -hypoxia -hypoxic -hypsarrhythmia -hysterectomized -hz -i -i.e. -i/sigma -ibuprofen -ichthyosis -icodextrin -icosahedral -identification -identified: -identities -identity -idiopathic -ifosfamide -ikeda -ileum -illuminance -image -imaging -imidazoline -imipramine -immature -immediately -immobilized -immune -immuno -immunoassay -immunoassayable -immunological -immunoreactive -immunoreactivity -impaired -impairment -impedance -implant -implantation -implanted -implantology -implants -implications: -impoverished -impression -improvement -impulsivity -in -in: -inactivated -inactivation -inactivator -inactive -inattentive -incentives -incidence -incisor -incisors -inclined -include -include: -included: -includes: -including -increases -increasing -indanestrol -indapamide -independent -indeterminate -index -indexes -indian -indians -indicate: -indices -indirect -individual -individually -individuals -indole -indomethacin -indoor -induced -inducer -inducible -ineffectiveness -inertance -inertia -inertial -infants -infarct -infarcted -infarction -infected -infection -infections -infectious -inferior -infiltrative -inflammation -influx -information -information: -infraspinatus -infusion -ingestion -inguinal -inhalation -inhibin -inhibited -inhibition -inhibitor -inhibitors -inhibitory -inion -initial -initiation -initiator -injection -injured -inner -innervated -innovated -inosine -inositol -input -inserted -insertion -inside -inspection -inspiration -inspiratory -institutionalized -instrumentality -instruments -insufficiency -insulation -insulin -insulinaemia -insulinemia -insurance -intact -intake -integral -intensities -intensity -intensive -interaction -intercalated -intercept -interchange -interest -interest: -interference -intermediate -intermittent -internal -international -internists -internship -interrupted -interstitial -interval -intervening -intervention -interview -intestinal -intestine -intima -intimal -intracellular -intromissions -intron -inulin -invariant -invasive -inversion -investigated: -involved -involved: -involves: -iodide -iodine -ion -ionomycin -ions -iopamidol -iota -ipratropium -ipsilateral -irbesartan -iron -irradiance -irradiances -irradiated -irradiation -irregular -irrigated -irrigation -irritant -irritants -is -is: -ischaemia -ischaemic -ischemia -ischemic -isethionate -iso-treated -isoflurane -isoform -isolated -isoleucine -isometric -isoniazid -isonymy -isoprenaline -isoproterenol -isotropic -issues: -isthmic -isthmus -italy -jacket -japanese -jejunal -jejunostomy -jejunum -jersey -jet -joggers -joining -joint -jointless -josamycin -joule -joules -jugular -juice -junction -junctional -juvenile -juxtamembrane -k -k(+) -k+ -kaempferol -kainate -kallidin -kallikrein -kanamycin -kanzo -kaolinite -kappa -karagouniko -kassinin -keloid -keloids -kelvin -keratin -keratinizing -keratinocytes -keratins -keratitis -keratometry -ketac-endo -ketamine -ketanserin -keto -ketoconazole -ketoprofen -ketorolac -ketotifen -kidney -kidneys -killer -kilodalton -kilodaltons -kindergarten -kinetic -kinetochores -kinetoplast -kininogen -kirschner -kirsten -klebsiella -knee -knees -know -knowledge -known -komarov -koniocellular -kooliner -korotkoff -kringle -kringles -kubicek -kulchitsky -kupffer -kurtosis -kwashiorkor -l-692,429 -l-ala-d-glu -l-leucine -l-name -l929 -labetalol -laboratory -labour -lacidipine -lactalbumin -lactase -lactate -lactating -lactation -lactitol -lactobacilli -lactobacillus -lactose -lactulose -lacunes -lambda -lamella -lamellae -lamellar -lamina -laminin -lamprey -landrace -langendorff -langmuir -language -lansoprazole -laparoscopic -laparoscopy -laparotomy -lard -large -larger -larynx -lasalocid -laser -lasting -latamoxef -late -latencies -latency -latent -lateral -lateralis -latum -layer -layers -laying -lead -leader -leaflets -lean -learning -leaves -lecithin -left -left-handed -leg -leghorn -legionella -legs -leiomyoma -leishmania -length -lengths -lepromatous -leptin -lesion -lesional -lesioned -lesions -less-soluble -let -lethal -leu -leu-m1 -leucine -leucocyte -leucovorin -leukemia -leukocyte -levamisole -levcromakalim -level -levels -levo -levobupivacaine -levofloxacin -levorotatory -lewis -lewisite -li -liable -lidocaine -lie -life -ligand -ligands -ligation -light -light-chain -light-cured -lighter -lightness -lignin -likelihood -limb -limbal -limited -limousin -lincomycin -lincosamide -line -linear -linearity -lingual -linker -linoleate -linoleic -lipase -lipid -lipids -lipocalin-type -lipofectin -lipopolysaccharide -liposomal -liposome -liposomes -lipreading -liquid -liquiritigenin -lire -lisinopril -lisuride -liter -liters -litter -little -live -liver -liver-type -load -loaded -loading -lobe -local -location -locus -log -london -long -long- -long-chain -long-lasting -long-lived -long-styled -long-wave -long-wavelength -long-wavelength-selective -long-wavelength-sensitive -longer -longevity -longissimus -longitudinal -loop -loops -loratadine -lorazepam -losartan -loss -loss" -lovastatin -low -low- -low-affinity -low-grade -low-yield -lower -lumbar -lumen -luminal -luminance -luminol -luminosity -lumpectomy -lung -lungs -lupin -luteal -lutein -luteolin -lymph -lympho- -lymphocyte -lymphocytes -lymphoid -lymphoma -lysine -lysis -lysosomal -lysozyme -m -m-5041t -m412 -mackay -macrolide -macrophage -macrophages -macula -madison -magnesium -magnetization -magnification -magnitude -magno -magno- -magnocellular -main -maintain -maintenance -maize -major -malabsorption -malathion -male -males -malignant -malnourished -malnutrition -maltodextrin -maltose -mammal -mammalian -mammography -mandible -mania -manics -mannan -mannitol -mannose -mannozym -mannuronic -manual -maori -marcus -margarine -margin -marginal -marijuana -marker -markers -marrow -marrow-dependent -masculine -masculinity -masked -mass -masses -masseter -mastectomy -mastoid -match -matching -maternal -matrix -maturation -maturational -mature -mauthner -maximal -maximum -meal -mean -meaning -meaningfulness -means -measles -measured -measures -meat -mecamylamine -mechanical -meclofenamate -media -medial -medialis -median -mediastinal -mediastinum -medical -medium -medium- -medium-sized -medium-wavelength -medium-wavelength-sensitive -medulla -medullary -mefloquine -megakaryocyte -megakaryocytes -meiotic -meishan -melanin -melanocytes -melanoid -melanoma -melatonin -melittin -meloxicam -melphalan -membrane -membrane-associated -membrane-binding -membrane-bound -membraneous -membranes -membranous -memory -men -meningiomas -meningitis -menses -menstrual -menstruation -meperidine -mepivacaine -merino -meropenem -mesencephalon -mesenchyma -mesenchyme -mesenteric -mesial -mesodermal -mesometrial -mesophyll -mesor -mesothelial -mesothelioma -messenger -mesterolone -mestranol -met -metabolic -metabolism -metabolite -metabolites -metabolized -metacentric -metacentrics -metacyclic -metal -metallic -metallicolous -metals -metanephrine -metaphase -metaproterenol -metarhodopsin -metastases -metastasis -metastatic -meters -metestrus -metformin -methacholine -methadone -methamphetamine -methanol -methicillin -methionine -methohexitone -methomyl -methotrexate -methoxamine -methoxychlor -methoxyflurane -methyl -methylanthranilate -methylase -methylated -methyldopa -methylergometrine -methylisobutylxanthine -methylprednisolone -methysergide -metoclopramide -metoprolol -metronidazole -metyrapone -mexiletine -mg -mg/kg -mg/m2 -mice -microalbuminuria -microelectrode -microfold -microg/kg -micromagnets -micronutrients -micropyle -microsomal -microsomes -microsporidia -microvillous -mid -mid-myocardial -mid-portion -mid-sized -mid-styled -midazolam -middle -middle- -middle-aged -middle-envelope -middle-size -middle-sized -middle-wave -middle-wave-sensitive -middle-wavelength -middle-wavelength-sensitive -midlobular -midmyocardial -midmyocardium -migraine -migrants -migration -mild -milk -milking -milkofix -million -millions -milrinone -min -mineral -miniature -minimal -minocycline -minor -minoxidil -minute -minutes -miokamycin -mirex -mirror-image -miscellaneous -misoprostol -missing -misty -mite -mitochondria -mitochondrial -mitomycin -mitoses -mitosis -mitotic -mitoxantrone -mitral -mivacurium -mixed -mixture -mj -ml(-1) -mm -mmol/l -mobility -modafinil -model -moderate -moderately -modification -modified -modifier -modulation -modulator -moduli -modulus -moisture -mol/l -molal -molality -molar -molars -molecule -moles -mollicutes -moloney -molsidomine -molt -moment -moments -monensin -monetite -money -mongrel -moniliformin -monitoring -mono- -monoamine -monoclinic -monoclonal -monocrotaline -monocyte -monocytes -monomer -monomeric -monomers -mononuclear -monophasic -montelukast -month -months -montmorillonite -more-soluble -morning -morphine -morphogenetic -morphology -mortality -morula -morulae -mosaic -mother -motilin -motor -mounts -mouse -mouth -movement -moving -mu -mucin -mucinous -mucoid -mucosa -mucosal -mucosal- -multiparous -multiple -murine -muscarine -muscarinic -muscle -muscle-specific -muscle-type -muscles -muscular -music -mutagenic -mutant -mutants -mutated -mutation -mutation, -mutations -mutator -muzolimine -myasthenia -mycelial -mycelium -mycobacterium -mycoplasma -mycorrhizal -myelin -myeloid -myocarditis -myocardium -myoglobin -myometrial -myometrium -myopathy -myosin -myricetin -mystus -n -n-terminal -n-terminus -nabilone -nacl -nadolol -nafcillin -naive -naked -naloxone -naltrexone -nandrolone -naphthalene -naproxen -narrow -nasal -nasion -nasopharynx -native -native-like -natural -nausea -nebivolol -nebuchamber -neck -necrosis -nefazodone -negative -negentropy -neglect -neighborhood -neither -nematic -neomycin -neonatal -neopterin -neopterins -neostigmine -neostriatum -neoxanthin -nephrectomized -nephrectomy -nephropathy -nerve -nerves -nervousness -netilmicin -network -neural -neuraminidase -neurinomas -neuroblast -neuroblastic -neuroblastoma -neurologic -neuronal -neurons -neuropathy -neuropeptide -neurotensin -neuroticism -neutral -neutralisation -neutralization -neutralizing -neutron -neutrons -neutrophil -neutrophils -newborn -newborns -newton -newtons -nh2-terminal -niacin -nicardipine -nicholas -nicorandil -nicotine -nicotinic -nifedipine -night -nighttime -nimesulide -nimodipine -nipple -nisin -nisoldipine -nitazoxanide -nitrate -nitrendipine -nitrification -nitrite -nitro -nitrogen -nitrogenous -nitroglycerin -nitroimidazole -nitroprusside -nitroxide -nizatidine -no -nociceptive -nocturnal -nodal -node -nodes -nodular -nodule -noise -non-arthritic -non-blood-fed -non-cf -non-sclerotic -non-stimulated -non-tumor -non-users -nonatopic -none -nonlepromatous -nonlinear -nonlinearity -nonreinforced -nonreward -nonsmokers -nonsynonymous -nontemplated -noradrenaline -norepinephrine -norfloxacin -norgestomet -normal -normalized -normally -normals -normoalbuminuria -normocapnia -normokinesis -normolipidemic -normospermic -normotension -normotensive -normotensives -normotonic -normoxia -normoxic -north -northern -norverapamil -noscapine -not -notch -nourished -novaron -novel -november -novice -noxious -nuclear -nuclei -nucleo- -nucleobase -nucleocapsid -nucleocaspid -nucleophile -nucleoprotein -nucleotide -nucleotide-binding -nucleotides -nucleus -null -nulliparous -number -numbers -nurses -nursing -nutrient -nutrition -nymphs -nystatin -nzb/blnj -o -oats -obese -obesity -object -objective -oblique -observed -observer -obstruction -obstructive -occipital -occlusion -occupation -octahedral -odor -of -ofloxacin -often -oil -olaquindox -old -older -oleate -oleic -olein -oleoyl -oligodendroglioma -oligodendrogliomas -oligosaccharides -omeprazole -oncovin -ondansetron -only -oophorectomy -opaque -open -opening -openness -operator -opposite -oral -orally -orange -orbit -orchardgrass -orchidectomized -orchiectomy -organic -organomegaly -orientation -orientational -orlistat -orosomucoid -oroxylin-a -orthorhombic -orthostatism -ossimi -osteocalcin -other -ouabain -outcome -outcomes -outdoor -outer -output -outside -ovalbumin -ovariectomized -ovariectomy -ovary -overload -ovine -oxaliplatin -oxatomide -oxidase -oxidation -oxidized -oxmetidine -oxygen -oxytetracycline -oxytocin -p -p-aminobenzamidine -p-hydroxyphenyl -pace -pachytene -pacing -pacinian -paclitaxel -paddles -pain -pair -pair-fed -paired -palatal -palindromic -palm -palmitic -palmitoyl -palpation -pamidronate -pancreas -pancreatectomy -pancreatic -pancuronium -pantethine -pantoprazole -papain -papaverine -paper -papilla -paracetamol -parallel -parameter -parameters -paraplegia -paraplegic -parasitaemia -parasympathetic -parathion -parathyroid -parenchyma -parenchymal -parent -parental -paretic -pargyline -parietal -parity -parkes -parkinsonism -paromomycin -parotid -partial -particulate -parvo -parvocellular -passage -passages -passive -pastoralists -patch -patency -pathogenic -pathologic -pathological -pathologically -pathology -pathways -patient -patients -pattern -patterns -peak -peaked -peat -peats -pectin -pediatric -pedicled -pefloxacin -pellet -pellets -pelvis -penetrating -penetration -penicillamine -penicillin -penicylinders -pentagastrin -pentazocine -pentobarbital -pentoses -pentoxifylline -peplomycin -peptide -peptides -peptidyl -peptidyl-trna -percentage -percentile -percussion -perforating -perforin -performance -perfusion -pericellular -pericytes -perimeter -perindopril -perinuclear -period -periodontitis -periosteal -peripheral -periportal -peritoneal -peritoneum -peritonitis -periventricular -permanent -permeabilities -permeability -permeation -permer -permixon -peroneal -peroxidase -peroxide -peroxy -perpendicular -persistence -persistent -persisting -personal -pertussis -ph -phagocytosis -pharmacy -pharyngeal -pharynx -phase -phases -phenanthrene -phenol -phenotype -phentolamine -phenylbutazone -phenylephrine -phenytoin -pheochromocytoma -phlebotomy -phlorizin -pholcodine -phosphate -phosphate-binding -phosphates -phospho -phospho- -phospholipid -phosphoprotein -phosphoproteins -phosphoramidon -phosphorous -phosphorus -phosphorus-31 -phosphorylated -phosphorylation -photosensitivity -photosynthesis -phoxim -phthalazine -physical -physicians -pi -pi(hs) -pietrain -pig -pill -pilocarpine -pimozide -pinacidil -pindolol -pinhole -pink -pink-eyed -pinzgauer -piperacillin -piracetam -pirenzepine -piroxicam -pitch -pituitary -placebo -placebo-controlled -placebos -placental -plagiocephaly -plain -plant -plantaris -plants -plaque -plasma -plasmacytoid -plasmapheresis -plasmid -plasmin -plasminogen -plastic -plateau -platelet -platelet-type -platelets -platinum -pleomorphic -plication -plosive -plus -pmoles -pneumonia -pneumoperitoneum -point -poise -poland -polar -polarity -polarization -polarizations -poleward -polished -pollock -polluted -poly -polyclonal -polydispersity -polymer -polymerase -polymerase-associated -polymeric -polymorphic -polymorphism -polyneuropathy -polypay -polypeptide -polypeptides -polyphenols -polyps -polyunsaturated -polyvinylpyrrolidone -pons -pontine -pony -poor -population -populations -porcelain -porcine -pore -pore-forming -pork -porosity -porphyrin -portsmouth -position -positions -positive -post -post- -post-natal -posterior -posteriorly -posthatching -postnatal -postnatally -postpartum -postural -potassium -potato -potency -potential -power -ppm -practice -pravastatin -prazosin -pre-existing -pre-pubertal -precipitation -precision -precursor -precursors -predictability -predicted -prednisolone -prednisone -predominant -preeclampsia -preference -preferred -preferring -pregesterone -pregnancies -pregnancy -pregnant -pregnenolone -preload -premature -prenalterol -prenylamine -preparation -preperitoneal -prepuberal -presence -present -pressor -pressure -pressure-loading -pressure-sensitive -pressures -preterm -prevalence -preventable -primary -primer -priming -primiparous -primitive -principal -pristinamycin -pro -pro-oestrus -probabilities -probability -probe -probenecid -problem -probucol -procainamide -procaine -procarbazine -procedure -procedures -processes -processing -procholeragenoid -product -production -productive -productivity -proestrous -proestrus -profile -prog -progenitor -progesterone -progesterone's -progesterone, -progesterone- -progestin -progestins -progestogen -progestogens -program -progression -progressive -progressor -progressors -proinflammatory -prolactin -proliferating -proliferation -proliferative -proline -promethazine -promoter -promoters -promotion -prone -propacetamol -propafenone -propanediol -properdin -prophylactic -propionate -propofol -proportion -proportional -proportions -propoxyphene -propranolol -prostate -prostatic -protamine -protease -protected -protection -protein -protein-rich -proteins -proteinuria -proteolytic -protocol -protocols -protoplasmic -prototype -protozoa -protruding -protrusion -protrusive -proximal -pseudoanodontia -pseudomonas -pseudorotation -psoralen -psychiatric -psychological -psychoticism -pugh-child -pulmonary -pulsatile -pulse -pulses -pulvinar -pump -puncture -pupae -purified -purine -purinergic -purkinje -push -putamen -putrescine -pyramidal -pyrene -pyridine -pyridostigmine -pyridoxine -pyrimethamine -pyrolysis -pyruvate -q -q10 -quackenbush -quadriceps -quadrivalent -quadrupole -quality -quantitative -quantity -quartile -quartiles -quenchers -quercetin -query -questionnaire -queuine -queuosine -quiescence -quiescent -quiet -quinacrine -quinapril -quinidine -quinine -quinoid -quinoline -quinone -quinones -quintiles -quisqualate -quotient -quotients -rabbit -rabeprazole -rac, -rac-, -racemic, -radial -radiation -radicals -radiculography -radii -radioactive -radioiodinated -radiolabeled -radiotherapy -radius -rads -raffinose -rahmani -raillietina -raloxifene -raman -rambouillet -ramipril -rams -ran -random -randomness -range -ranitidine -rankin -rapid -rarefaction -rarely/never -rat -rate -rates -ratio -ration -ratios -rats -rats, -rauscher -raw -rcc-36, -reabsorption -reacted -reaction -reaction, -reactions -reactive -reactivity -reagent -reagent, -reagents -rear -rearranged -rearrangement -reasoning -rebreathing -recalls -received -recently, -receptive -receptor -receptor-binding -receptor-regulated -receptors -receptors, -recession -recessive -recipient -recipients -reciprocal -recirculation -recognized -recollection -recombinant -recombinase -recovery -rectal -rectum -recurrent -red -red-eye -red-light -reduced -reducing -reductase -reduction -reference -reflectance -reflex -reflexes -reflow -refractile -refraction -refractoriness -refractory -region -regional -regions -regression -regressor -regular -regulator -regulatory -regurgitant -regurgitation -rehabilitation -rehydration -reinforced -reinforcement -rejection -relapse -relapsers -relapsing -related -relatedness -relation -relationship -relative -relatives -relaxation -relaxed -relaxin -relaxing -release -relevant -reliabilities -reliability -remaining -remember -remifentanil -remission -removal -removed -renal -renin -renshaw -reoxygenation -repeat -repeatability -repeated -repeats -reperfused -reperfusion -repetitive -replacement -replicase -repolarization -repression -repressor -reproducibility -reproductive -resection -reserpine -resident -residents -residual -residue -residues -resistance -resistances -resistant -resistive -resistivity -resistor -resolution -resolve -resolved -resonance -respective -respiration -respiratory -responded -responder -responders -responding -response -responses -responsive -rest -rested -resting -restrained -restraint -restricted -restriction -resulting -results: -resuscitation -resveratrol -retard -retardation -retentate -retention -reticular -reticulocytes -retina -retinol -retinular -retraction -retransfusion -retrograde -retroperitoneal -retzius -reverse -reward -rewarded -rhabdomyosarcoma -rhamnose -rheumatism -rhinitis -rhodamine -rhodopsin -rhombohedral -rhombomere -rhombomeres -rhythmic -ribavirin -ribose -ribosomal -ribosomes -rice -ricin -rickard -ridgelands -rifampicin -rifampicine -rifampin -rigevidon -right -right-handed -rigidified -rigidity -rilmenidine -ring -rinn -ripe -risk -ristocetin -ritonavir -rituximab -rna -rocuronium -rodent -roentgen -roentgens -rolipram -romanov -root -ropivacaine -rostral -rotation -rough -roughness -round -roundness -routine -rower -roxatidine -rs14203, -rubella -rudimentary -ruminal -ruminant -run -runners -running -rupture -rural -rutin -ryanodine -rye -s -s-methyl -s-phase -saccade -saccharose -sacrum -sagittal -salbutamol -salicylate -saline -salinity -salinomycin -saliva -salivary -salivary-like -salmeterol -salmon -salt-hypertension-sensitive -salt-sensitive -same -sample -samples -sand -saponins -sarafotoxin -sarcoidosis -sarcomatoid -sardi -sarsaponin -sartorius -satisfactory -saturated -saturation -scale -scao -scar -scatter -scattering -scavenger -schistosoma -schizophrenia -schizophrenics -sclerosis -scmc -scopolamine -score -scores -seated -secobarbital -second -secondary -seconds -secreted -secretin -secretion -secretory -sections -sedation -sedentary -sedimentation -segment -segmental -segments -select -selected -selection -selective -selegiline -selenium -self -self-concept -self-incompatibility -sella -semiconducting -seminoma -seminomas -semitendinosus -senescent -sense -sensibility -sensing -sensitive -sensitivity -sensitized -sensitizer -sensolog -sensory -sentence -separation -sephadex -sepiolite -sepsis -septal -september -septic -septum -sequence -sequence, -sequential -ser -sera -serial -series -series, -serine -serine-rich -serines -serosa -serosal -serotonin -serovar -serres -serum -serum-soluble -services -several -severe -severity -sevoflurane -sex -shade -shaft -shakuyaku -sham -sham-branded -sham-operated -sham-pinealectomized -sharpness -shear -sheep -shell -shift -shock -shocked -short -short- -short-wave -short-wave-sensitive -short-wavelength -short-wavelength-sensitive -shortening -shorter -shorthorn -shortwave-sensitive -shoulder -showed -shunt -siblings -sibutramine -sickle -side -sigmoidicity -signal -significant -silage -silane -silent -silver -silymarin -similarity -similarly, -simmental -simple -simultaneous -simvastatin -since -single -single-stage -singlet -sinus -sinusoids -site -sites -sitting -size -sizes -skeletal -skeletonized -skin -skull -sleep -slices -slides -slight -slope -slopes -slow -slow-resistant -slow-twitch -sludge -sluggish -small -small-surface -smaller -smallest -smoke -smokers -smoking -smooth -snout -social -sodium -sodium-sensitive -softness -soil -solcoseryl -soleus -solid -solitus -solubilities -solubility -soluble -solute -solution -solvent -somatic -somatomedin -somatosensory -somatostatin -somatotropin -somite -somites -sonicare -sorbitol -sotalol -sound -south -southern -sows -soy -soybean -space -spacer -spacings -spain -spatial -specialists -species -specific -specificity -spectinomycin -spectrophotometric -speed -spermidine -spherical -sphingomyelin -sphingosine -spike -spin -spinal -spine -spinnbarkeit -spirometry -spironolactone -spleen -splenectomized -splenic -splenocytes -spontaneous -sporadic -spot -spotting -spray -spread -spreading -spretus -spring -sprint -sprint-trained -sprinters -sprotte -sr48968 -sramek -stabe -stabilization -stable -stacked -stage -staging -standard -standard, -standards -standing -stanozolol -staphylococcus -star -starch -start -starvation -starved -state -static -stationary -stature -stearic -stearylamine -stenosis -stenting -step -stereopreference -sterile -steroid -steroids -stiffness -stilbenedisulfonates -stimulated -stimulating -stimulation -stimuli -stimulus -stoichiometry -stomach -stool -storage -strain -strata -stratum -straw -strength -streptokinase -streptomyces -streptomycin -stress -stressed -stretch -striatal -striatum -strict -stripping -stroke -stroma -stromal -strong -strongly -structure -students -studies, -study -study, -styrene -subcutaneous -subiculum -subject -subjective -subjects -submerged -subordinates -subpopulations -substance -substituted -substitution -substrate -substrate, -substrate-adherent -substrates -subtilisin -subtypes, -subunit -success -successful -succinate -succinylcholine -suckled -suckling -sucralfate -sucrase -sucrose -sufentanil -suffolk -sugar -sulfadiazine -sulfadoxine -sulfamethoxazole -sulfatase -sulfate -sulfated -sulfates -sulfinpyrazone -sulfisoxazole -sulfite -sulfonate -sulfoxide -sulfoxide, -sulfur -sulglicotide -sulodexide -sulphamethoxazole -sulphate -sulphide -sulphur -sulpiride -sum -summation -summer -superconducting -superficial -superior -superlative -supernatant -supernatants -supination -supine -supplement -supplementation -supplemented -supplies -support -suppressor -suprofen -suramin -surface -surfaces -surfactant -surgeon -surgeons -surgery -surgical -survanta -survey -survival -survivors -susceptibility -susceptible -susceptibles -suspect -suspended -suspension -sustained -suture -sweden -swedish -swimmers -swimming -swiss -switch -syk -symmetric -symmetrical -sympathetic -symptom -symptomatic -symptoms -syn -synaptic -synchronization -synchronous -synchrony -syncope -synonymous -synthase -synthesis -synthesized -synthetic -syringyl -sysomicin -system -system, -systemic -systems, -systole -systolic -t -t-cell -tablet -tablets -tachycardia -tacrolimus -tactile -tail -taiwanese -tallow -tamoxifen -tandem -tap -tape -tar -tarentaise -target -targets -targhee -task -tau -taurine -taut -taxol -taxotere -tazobactam -tear -technical -tectum -teeth -tegafur -teicoplanin -telangectasia -telmisartan -telomere -telomeric -telophase -telsa -temperature -temperatures -tempering -template -temporal -tendinous -tendon -teniposide -tense -tensed -tension -tensions -tenue -teratoma -teratozoospermia -terazosin -terbutaline -terfenadine -term -terminal -terminator -terminators -territorial -tertatolol -tesla -test -tester -testerone -testing -testis -testosterone -testosterone's -testosterone, -tetanus -tetracaine -tetracycline -tetragonal -tetrahedral -tetramethylrhodamine -tetranychus -tetraplegic -tetraploid -texas -thalamus -thalidomide -that -the -theca -thelytoky -theophylline -therapy -therapy-related -thermophilic -theta -thiacetazone -thiamin -thiamine -thickness -thicknesses -thigh -thighs -thin -thin-thread -thiolation -thionein -thiopental -thiopentone -thiophene -thiophosphamide -thioridazine -thiotepa -this -thomsen-friedenreich -thoracic -thr -threat -threonine -threshold -thresholds -thrombin -thrombocytopenia -thrombolysis -thrombosis -thrombus -thymic -thymic-dependent -thymic-derived -thymidine -thymine -thymines -thymoma -thymus -thymus- -thymus-dependent -thymus-derived -thymus-processed -thyroid -thyroidectomized -thyroidectomy -thyroxine -tianeptine -tiapamil -tibetans -tibia -tibial -tibolone -ticlopidine -tilapia -tilt -time -time-reversal -timed -times -timolol -tinidazole -tiny -tip -tissue -tissue-type -tissues -titanium -titer -tizoxanide -to -tobacco -tobramycin -tocopherol -tocopherols -tolbutamide -tolerance -tolerant -tolmetin -toluene -tone -tongue -tonic -tonnes -tons -tonsillectomy -top -topotecan -toronto -torpedo -torque -torr -torsion -torsional -total -toth -touch -tourniquet -toxicity -toxoid -tpsic -tq -trabecular -tracheal -traditional -trained -traineeship -training -tramadol -trandolapril -trans -transcribed -transducin -transection -transfer -transferred -transferrin -transformed -transfusion -transgenic -transient -transit -transit-time -transition -transitional -translation -translational -translocation -transmembrane -transmission -transmissivity -transmittance -transpiration -transplant -transplantation -transplanted -transplants -transport -transverse -trastuzumab -trauma -traumatic -trays -treadmill -treated -treatment -treatments -trehalase -trehalose -tremor -trendelenburg -trenimon -tri- -trial -triamcinolone -triamterene -triazolam -triceps -trichophyton -triclinic -tricuspid -tricyclic -triglycerides -trimer -trimethoprim -trimipramine -tripelennamine -triple -triplet -triptolide -trocar -trochanter -troglitazone -tropisetron -trough -true -truncated -trypanosoma -trypsin -tryptamine -tryptase -tryptic -tryptophan -tuberculoid -tubes -tubular -tubule -tubulopathic -tuli -tumor -tumor-bearing -tumorlike -tumorous -tumors -tumour -tumours -turbuhaler -turmeric -turned -tuscany -twin -two -tylosin -tymidine -type -tyr1 -tyramine -tyrosine -tyrosol -tytin -u-0521 -ubiquitinated -ulceration -ulcers -ulinastatin -ulnar -ultrasound -unaffected -uncertain -uncertainty -unchanged -unclassifiable -unclassified -unconjugated -uncontrolled -undernourished -undernutrition -undifferentiated -unfavourable -unfolded -unfractionated -uniformly -unilateral -uninfected -uninvolved -unipolar -unit -united -units -units/ml -unmodified -unrelated -unresolved -unresolved/disorganized -unrestricted -unsaturated -unstimulated -untrained -untreated -up -upper -upright -upstream -uptake -uracil -uraemic -uranium -urapidil -urban -urea -urecholine -uremia -uremic -ureter -urethane -urgency -uridine -uridines -uridylate -urinary -urine -urokinase -users -using -usual -uterine -uterus -utilization -v -v-thread -vacancies -vaccinated -vaccination -vaccine -vacuolar -vacuolar-type -vagal -vagina -vaginal -vaginally -vagotomy -val -valency -validation -valine -valsalva -valsartan -value -valves -vanadate -vanadium -vancomycin -var -variability -variable -variable-region -variance -variant -variants -variation -varicella -varicocele -variety -varnish -vascular -vasculitis -vector -vecuronium -vegetable -vegetarian -vegetarians -vegetative -vehicle -vein -veins -velocities -velocity -venlafaxine -venoles -venom -venous -ventilated -ventilation -ventilatory -ventral -ventricle -ventricles -ventricular -venules -verapamil -veratridine -verb -verbal -vermilion -verprolin-homology -vertebrae -vertebral -vertical -vesicle -vesicles -vesicular -vessels -vestibular -vetch -viability -viable -vibrational -vigor -villi -villin -vimentin -vinblastine -vinclozolin -vincristine -vindesine -vinorelbine -violaxanthin -violet -viral -virchowian -virgin -virion -virulence -virulent -virus -viruses -visceral -viscosity -vision -visits -visual -visual-only -visually -vitality -vitremer -vivarium -vivonex -vmax -voltage -voltages -volts -volume -volumes -volumetric -volunteers -vomiting -vomitoxin -vowel -w(peak) -waist -wake -wakefulness -waking -walking -wall -warfarin -warm -warm-sensitive -warmed -warmth -washed -water -watermelon -watt -watts -wave -weak -weakly -weaned -weanimix -weaning -wedge -wedged -week -weekly -weeks -weighed -weight -weighted -weights -well-differentiated -well-nourished -west -western -wet -wettedness -wheat -wheezing -white -white-eye -white-spotting -whites -whole -whorls -width -wiener -wiktor -wild -wild-type -wilted -wine -winter -wistar -with -withdrawal -wogonin -wollastonite -women -word -work -workload -workshop -wrinkled -wrist -x -x-linked -xanthan -xanthine -xbai -xenopus -xylazine -xylenes -xylitol -xylose -y -year -years -yeast -yeast-like -yeasts -yellow -yellowing -yield -yields -yogurt -yohimbine -yoked -yolk -yorkshire -young -younger -yttrium -z100 -zafirlukast -zearalenone -zeatin -zeaxanthin -zebrafish -zebu -zeolite -zeranol -zeste -zeta -zofenopril -zolpidem -zomepirac -zooplankton -zopiclone -zoster -zucker -zygapophysial -zymodeme -zymodemes -zymosan +a +a-wave +a/j +abdominal +abnormal +abortus +abscesses +absence +absent +absolute +absorbance +absorbency +absorptance +absorption +acarbose +acceleration +acceptor +acceptors +accessory +accumulation +accuracy +acebutolol +acetaldehyde +acetamide +acetaminophen +acetate +acetazolamide +acetoacetylation +acetone +acetonitrile +acetylcholine +acid +acidic +acids +acini +aconitine +acquired +acquisition +acrocentric +acromegaly +act +actin +action +action: +activated +activation +activator +active +activities +activitrax +activity +activity: +actuator +acupuncture +acute +addition +additive +addressed: +adductor +adducts +adenine +adenines +adenoidectomy +adenoma +adenomas +adenosine +adenylate +adenylation +adequate +adherent +adipocyte +admission +adrenalectomy +adrenalin +adrenaline +adrenaline-containing +adrenaline-storing +adrenergic +adriamycin +adult +adults +advanced +advantages: +adventurous +aerobacter +aerobic +aeromonas +affected +affective +afferent +african +africans +after +afternoon +age +aged +aggregated +aggression +aggressive +agonist +agonists +agouti +agreeableness +aims: +air +airways +akimbo +al(2)o(3) +ala +alanine +albumin +albuterol +alcohol +alcoholic +alcoholism +aldolase +aldosterone +alert +alfentanil +all-adenine +allele +alleles +allergic +allergy +allopurinol +alloxan +almitrine +alone +along +alopecia +alpha +alpha-methyldopa +alprazolam +alternating +altitude +alveolar +always +ambulatory +amenorrheic +american +amifostine +amikacin +amiloride +aminoacyl +aminoacyl-trna +aminophylline +amiodarone +amitriptyline +amlodipine +amniotomy +amodiaquine +amorphous +amount +amounts +amoxicillin +amoxycillin +amperes +amphetamine +ampicillin +amplified +amplitude +amplitudes +ampulla +ampullary +amrinone +amygdala +amylase +amyloid +amyloidosis +an +anaemia +analgesia +analogue +analysis +analysis, +analysis: +analyzed: +and +and/or +androgen +androgens +andromonoecious +androstendione +androstenedione +androsterone +anemic +anergic +anesthesia +anesthetized +anestrous +anestrus +aneuploid +aneuploidy +angina +angiography +angiotensin +angle +angstrom +angstroms +angus +aniline +animal +animals +anionic +anions +aniridia +ankyloglossia +annulus +anomalies +anomaly +anorexia +anoxia +antagonist +anterior +anteriorly +antheraxanthin +anthracene +anthracycline +anthracyclines +anti +anti-lewis +antibiotic +antibodies +antibody +antigen +antipyrine +antrectomy +antrum +anxiety +aorta +aortic +apex +apical +apnea +apneas +apo +apolipoprotein +apolipoproteins +apomorphine +apoprotein +apoptosis +approaches: +aprepitant +aprotinin +aqueous +arabinoside +arachidonate +arches +are +are: +area +areas +areas: +argas +arginase +arginine +arm +arms +arrhythmia +arterial +arteries +arterioles +artery +arthritic +as +as: +ascites +asians +asked: +aspects: +aspirin +assimilation +associative +assumptions: +asthma +asthmatic +asthmatics +astrocytes +astrocytoma +astrocytomas +asymmetric +asymmetrical +asymmetry +asymptomatic +asymptote +asymptotic +at +ataxia +atenolol +atherogenic +athletes +atomic +atopic +atorvastatin +atracurium +atrazine +atresia +atretic +atria +atrial +atrium +atropine +attenuated +attenuation +atypical +aubert +auditory +auditory-alone +auditory-only +austria +austrian +autism +autoclaved +autologous +automatic +autonomy +autumn +auxotype +available +average +avian +avidin +avirulent +avoidant +axial +axillary +axis +azathioprine +azithromycin +b +b.i.d. +b/beijing/184/93 +b4 +baboon +background +backward +bacteremia +bacteremic +bacteria +bacterial +bacteriochlorophyll +bacteriochlorophylls +bacteroides +baicalein +bakumondo-to +balance +balanced +balb/cj +balloon +band +banded +bar +barbital +barley +barrier +barring +basal +basalis +base +baseline +bases +basic +basicity +basolateral +basophils +bath +bathorhodopsin +bcnu +beads +beans +bearing +beats +becke +beclomethasone +beef +before +belgium +belgrade +beliefs +benazepril +bendamustine +bending +benfluorex +benfluron +benign +benignity +bentonite +benzamide +benzene +benzo +benzoate +benzylamide +bepridil +bermudagrass +berodual +beta +betamethasone +betaxolol +bezafibrate +bi- +biarra +bias +bicarbonate +bicycle +bilateral +bilirubin +binding +binocular +bioaccumulation +bioactive +bioactivity +bioassay +biofeedback +biological +biomass +biopsies +biopsy +biopterin +biotin +biotypes +biphasic +biphenyl +biphenylene +biphenyls +birth +bisexual +bisoprolol +bivalent +black +blackbelly +blacks +bladder +blank +bleeding +bleomycin +block +blocked +blocking +blood +blue +blunt +body +bolus +bombesin +bonding +bone +bone-marrow-derived +borderline +boron +bosentan +bottom +bottom-component +bound +bouton +bovine +boys +brace +bradykinin +brahman +brain +bran +brassica +breakdown +breakfast +breast +breast-fed +breathiness +breathlessness +bregma +brightness +brodimoprim +broiler +bromocriptine +bronchial +bronchitis +broth +brown +broxaterol +brushing +buccal +budesonide +buf/mna +buffer +bulb +bulimia +bulinus +bumetanide +bupivacaine +buprenorphine +burn +burned +bursa-equivalent +bursal +bursal-derived +bursting +buserelin +butanol +butorphanol +butter +butyl +c +c(5)h(8), +c(6)h(10), +c-peptide +c-terminal +c57bl/6j +ca2+. +caerulein +caffeine +calcitonin +calcitriol +calculus +calorie +calves +calyx +canadian +cancer +candesartan +candida +candidate +canine +capacitance +capacitor +capacity +capecitabine +capillaries +capillary +caprine +capsaicin +capsid +captopril +carbachol +carbamate +carbamazepine +carbaryl +carbohydrate +carbohydrate-rich +carbohydrates +carbomer +carbon +carboplatin +carboxy +carboxy- +carboxy-terminal +carboxyl +carboxyl-terminal +carboxylated +carcass +carcinoma +cardiac +cardiac-lethal +cardiologists +cardioversion +care +carminomycin +carnitine +carotenoid +carotid +carrageenan +carvedilol +case +casein +cases +cast +castrated +castration +casual +catabolism +catalase +catalyst +catalytic +cataract +catechin +catechol +catecholamines +catholic +cation +cationic +cations +cattle +caucasian +caucasians +caudal +caudata +caudate +cavity +cecum +cefotaxime +ceftazidime +ceftriaxone +cefuroxime +celecoxib +celiprolol +cell +cellobiase +cells +cells; +cellular +cellulose +cellulosic +celsius +cemented +cementum +cent +center +centigrade +central +centrilobular +centroid +centromeric +cephalothin +cerebellum +cerebrum +cerivastatin +cervical +cervix +cesarean +cetirizine +ch +chain +chains +chance +charolais +cheese +chemical +chemically-cured +chemiluminescence +chemotherapy +cherry +chicken +child +children +chimaeric +chimeric +chinese +chitosan +chitose +chlamydia +chloramphenicol +chlorhexidine +chlorination +chloroform +chloroquine +chlorpromazine +chlorthalidone +choice +cholate +cholecystectomy +cholesterol +cholesterols +cholestyramine +cholic +chow +chroma +chronaxie +chronic +chymase +cibenzoline +cicletanine +cilazapril +ciliated +cimetidine +ciprofloxacin +circular +circulating +circumference +circumferences +circumferential +cirrhosis +cis +cisatracurium +cisplatin +cisplatinum +cisternal +citrate +clamping +clarithromycin +class +classes +classic +classical +clearance +clearances +clindamycin +clinic +clinical +clockwise +clodronate +clomipramine +clonidine +clopidogrel +closed +closed-spiracle +clover +clozapine +cluster +clustered +clusters +cm +cm. +cm/m2 +cm3 +coactivation +coaptation +cobb +cocaine +codeine +coefficient +coefficients +coherence +cohesion +cohort +coil +colchicine +cold +cold-sensitive +collagen +collagenase +colon +colonic +colostrum +colour +columbia +columbinate +combination +combined +combustion +comfort +comfortable +common +community +compact +comparison +compensation +competence +competition +competitive +complaints +complement +complement- +complementary +complete +completeness +complex +complexed +compliance +compliances +compliant +complications +component +components +composite +compost +compounds +compression +compton +concentrate +concentration +concentrations +concise +concordance +concrete +concurrent +condensation +condensed +condition +conditioned +conditioning +conditions +conductance +cone +confederate +configuration +configurations +confined +confinement +confusion +conjugated +connecting +connection +conscientiousness +conserved +consonant +consonants +constant +constant-region +constants +constitutive +consumers +consumption +contact +contemplation +content +context +contingency +continuity +continuous +continuously +contractions +contralateral +contrast +contrasts +control +controlled +controller +controls +convection +conventional +convergent +conversion +cooh-terminal +cooling +cooperate +cooperation +cooperativeness +coopworth +copulating +copyright +cord +core +cores +corn +corneal +cornstarch +coronal +coronary +corrected +correlation +cortex +cortical +corticosteroid +corticosterone +cortisol +corynebacterium +cost +costs +cotton +coulomb +coulombs +coumarin +couplets +course +cream +creatine +creatinine +criteria +criterion +cryia +csf +cubic +culex +culture +cultures +cuprophan +curantyl +curcumin +cured +curvilinear +cutaneous +cyclase +cycle +cycles +cyclic +cycling +cyclists +cycloheximide +cyclohydrolase +cyclophosphamide +cylinders +cys +cysteine +cysts +cytidine +cytochrome +cytology +cytoplasm +cytoplasmatic +cytoplasmic +cytosine +cytosines +cytosol +cytosolic +d +d'man +d-loop +d-tga +dacarbazine +dai-saiko-to +daidzein +daily +dalton +daltons +dam +danazol +dantrolene +dark +darkness +darkschewitsch +daunomycin +daunorubicin +day +days +daytime +db1 +dba/2j +dbd-pz-nh2 +de-activated +dead +deafness +death +deaths +debrisoquin +debrisoquine +decapitated +decayed +decidua +decrease +decreases +decreasing +deep +defect +defection +defective +defibrotide +deficiency +deficient +deficit +deficits +defined +definite +deflation +degenerated +degradation +degrees +dehydrated +dehydration +dehydroepiandrosterone +dehydrogenase +deiodinase +delay +delayed +deletion +delirium +delivery +delta +deltamethrin +deltoides +demented +dementia +denatured +denervated +denervation +denmark +dense +densities +density +dentavax +dentin +deoxy +depolarizing +deposition +deprenyl +depressed +depression +deprivation +deprived +depth +dermatographism +dermis +dermographism +dermorphin +descending +desflurane +desiccation-selected +desmin +desmopressin +desmosine +desmosome +desogestrel +desynchronized +detail +detection +detergent +determinant +deurenberg +deuterated +deuterium +development +deviation +dexamethasone +dextran +dextro +dextromethorphan +dextropropoxyphene +dextrorotatory +dextrose +dhofari +diabetes +diabetic +diabetics +diagnosis +dialysance +dialysate +dialysis +dialyzable +dialyzed +diameter +diameters +diamond +diaphorase +diaphragm +diarrhea +diastole +diastolic +diazepam +diazinon +diazoxide +dibekacin +dibenzyline +dichloromethane +dichlorvos +diclofenac +died +diestrous +diestrus +diet +diet, +difference +differences +differentiated +diffuse +diffusion +diffusivities +diffusivity +diflunisal +digestibility +digital +digits +digora +digoxin +dihydrotestosterone +dihydrouridine +dilantin +diltiazem +diluent +dilute +dilution +dimension +dimensional +dimensions +dimer +dimeric +dimers +dimethoate +dimorphic +diopter +diopters +dioptre +dioptres +diphenhydramine +diphtheria +diploid +dipolar +dipyridamole +direct +directional +directly +disability +disc +discomfort +discontinuous +discrepancy +discriminability +discrimination +disease +disequilibrium +disgnathous +disinhibition +disobutamide +disopyramide +disorganized +dispersalloy +dispersed +dispersion +displacement +dissimilarity +dissipation +dissociative +distal +distance +distances +distensibility +distraction +distribution +disulfiram +dithranol +diuretic +diuretics +diurnal +divergence +divergent +diversity +division +dna +dobutamine +doca +docetaxel +dog +dolichol +domain +domains +dominance +dominant +donor +donors +dopamine +dopexamine +doppler +dormant +dorsal +dorsalis +dorset +dose +doses +double +down +doxazosin +doxorubicin +doxycycline +dp +drag +drainage +draining +dried +drink +drinkers +drive +driver +drivers +droperidol +drosophila +drowsiness +drug +drugs +dry +dual-marked +duarte +duodenum +duration +durations +during +duroc +dutasteride +dwarf +dye +dyes +dynamic +dyract +dysfunction +dysplasia +dyspnea +dysthymia +dystrophic +e +e(2) +e(t) +e-wave +early +east +eastern +ebony +eccentricity +ecdysone +ecgonine +echo +echocardiogram +echocardiographic +echocardiography +echovirus +eclampsia +ecori +ectostriatum +eczema +edatrexate +edema +edta +education +effect +effective +effectiveness +effector +effectors +effects +efficacies +efficacy +efficiencies +efficiency +efflux +effort +egg +eggen +eicosanoids +either +ejaculation +ejection +elastance +elastase +elastic +elasticity +elastin +elderly +elective +electric +electrical +electrolyte +electrolytes +electron +electronic +eledoisin +element +elements +elevated +elevation +elimination +elite +ellipticine +elliptocytosis +elongation +ema +embolic +embryo +embryogenic +embryonal +embryonic +emission +emotional +emotionality +emphysema +empty +enalapril +enamel +enantioenriched +enantiomeric +enantiopure +enantioselectivity +encainide +encephalitis +encoding +end +endocrinopathy +endometriosis +endometrium +endosulfan +endothelial +endothelin +endothelioid +endothelium +endotoxin +endralazine +endurance +endurance-only +endurance-trained +energies +energy +enflurane +english +enhancement +enhancer +enkephalin +enoxaparin +enoximone +enprostil +enriched +enrichment +entacapone, +enterobacteriaceae +enterobactin +entity +entropy +entry +envelop +envelope +environment +environmental +environments +enzyme +eosinophil +eosinophils +ependymomas +ephedrine +epidemic +epidermal +epidermis-type +epididymal +epididymis +epidoxorubicin +epidural +epilepsy +epimerase +epimerization +epinephrine +epiphysis +epirubicin +epitestosterone +epithelial +epithelioid +epithelium +epq-r +epsilometer +epsilon +equatorial +equine +equipment +equivalents +erect +ergometry +ergotamine +erosive +error +errors +erysipelas +erythema +erythroagglutinating +erythrocyte +erythrocytes +erythrocytic +erythroid +erythromycin +erythropoiesis +erythropoietic +erythropoietin +escharectomy +escherichia +esmolol +esophageal +esophagitis +essential +ester +esterified +estimates +estimation +estradiol +estradiol-17beta +estrodiol +estrogen +estrogen-treated +estrogenic +estrogenized +estrogens +estrone +estrous +estrus +etched +ethambutol +ethanol +ethanol-containing +ethanol-exposed +ethanol-fed +ethanolamine +ether +ethidium +ethmozine +ethyl +ethylbenzene +ethylene +etidocaine +etidronate +etilefrine +etintidine +etiocholanolone +etoh +etomidate +etoposide +euchromatic +eudragit +eugenol +eugenol, +euglycemia +euglycemic +european +europeans +euthanasia +euthyroid +evaluative +evaporation +evening +evenness +evoked +exacerbations +exam +examined, +excellent +excimer +excitation +excitatory +excited +exclusive +exclusively +excretion +execution +exercise +exercise-trained +exercised +exfoliative +exhaustion +exit +exoplasmic +expectancy +expected +expenditure +experienced +experimental +experimenter +experiments +expiration +expiratory +exponential +exposed +exposure +exposures +expressed +expressiveness +extended +extensibility +extension +extensive +extensor +extent +external +extinction +extracellular +extract +extracted +extraction +extractions +extracts +extranodal +extraversion +extraversion-introversion +extroversion +eye +f +f/sigma +f2000 +face +facial +facilitating +factor +factor" +factors +factors; +failing +failure +failures +fair +fall +false +familial +familiar +famotidine +farnesol +fast +fast-twitch +fasted +fasting +fat +fat-rich +fatal +father +fatigue +favourable +feathers +fecal +feces +fecundity +fed +feeding +felodipine +female +females +feminine +femininity +feminization +femoral +femur +fenfluramine +fenofibrate +fenoldopam +fenoterol +fentanyl +fermented +ferning +ferritin +ferromagnetic +ferryl +fertility +fertilization +fertilized +fertilizer +fetal +fetus +fetuses +fever +fi- +fiber +fibers +fibre +fibres +fibrin +fibrinogen +fibroblast +fibroblast-like +fibroblastic +fibroblasts +fibronectin +fibrosis +field +filamentous +filaments +filled +fillets +film +filtering +filtration +fimbrial +final +finally +finally, +finasteride +finger +finger-like +finishers +finnsheep +first +fischer +fish +fistula +fistulotomy +fit +fitness +flagellar +flat +flattened +flavonoids +flaxseed +flecainide +flexed-tail +flexical +flexion +flexor +flight +floating +flobufen +flow +flow-rate +flowers +flows +fluconazole +fluctuation +fludarabine +fludrocortisone +fluid +flumazenil +flunisolide +flunitrazepam +flunixin +fluorescein +fluorescence +fluorescent +fluoridated +fluoride +fluoride-resistant +fluorides +fluorine +fluoro +fluorophore +fluorouracil +fluoxetine +flurbiprofen +fluroshield +flutamide +flutter +fluttering-spiracle +fluvastatin +flux +foam +foetal +foil +folacin-deficient +folate +folded +follicles +follicular +for +force +forces +forget +forked +form +formaldehyde +formaldehyde-treated +formalin +formalin-fixed +formamide +formoterol +formula +formulation +fornix +forskolin +forssman +forward +fosinopril +fotemustine +foveal +fpd +fraction +fractionated +fractions +fracture +fragment +frame +frames +france +francs +free +freeze-branded +french +frequencies +frequency +frequent +frequently/always +fresh +freundlich +friesian +frizzle +front +frontal +fructosamine +fructose +fruit +frusemide +fsh +full +function +function, +functional +functioning +fundamental +fundus +furanose +furazolidone +furosemide +fusion +fusogenic +g +g-csf +g-protein +g-proteins +g/ml +gain +galactomannan +galactose +gallopamil +gamma +gaps +garlic +garnet +gas +gaseous +gastric +gastrin +gastrin-expressing +gastrin-producing +gastrocnemius +gastrointestinal +gastroschisis +gastrostomy +gastrozepin +gauche +gauge +gauss +gaussian +gauze +gavage +gel +gelatin +gelbvieh +gelsolin +gemcitabine +gemfibrozil +gender +gene +general +generalizability +generation +generations +genes +genetic +genistein +genital +genogroup +genomic +genotoxic +genotype +gentamicin +gentamycin +gentisate +gerbil +german +germany +germinating +germination +germline +gestation +gestodene +giemsa +gingivitis +ginsenosides +girls +girth +glandular +glass +glaucoma +gliadin +glial +glibenclamide +globular +globulin +glomerular +glomeruli +glu27 +glucagon +glucocorticoid +glucocorticoids +gluconate +glucosamine +glucose +glucosyl +glucuronide +glucuronides +glutamate +glutaraldehyde +glutathione +gly +gly16 +glyburide +glycated +glycemia +glycerol +glycine +glyco- +glycogen +glycogen-binding +glycoprotein +glycoproteins +glycopyrrolate +glycosylated +glycyrrhizin +glyoxal +gold +golgi +good +gossypol +grade +gradient +gradients +grading +graft +grain +gram +grams +granisetron +granular +granules +granulocyte +granulocytes +granulocytic +granulosa +grass +gravimetric +gravitational +gravity +grazing +greece +green +grey +grip +grooming +gross +ground +group +groups +groups; +grower +growing +growth +gtp-binding +gtpase +guaiacyl +guanethidine +guanfacine +guanidine +guanidinium +guanine +guanine-binding +guanine-nucleotide-binding +guanines +guanosine +guanosines +guanylate +guide +guluronic +gum +gvhd +gymnasts +gyroid +h +h2o2 +habituation +haemagglutinin +haematocrit +haemoglobin +haemolysis +hair +hairless +hairy +half-life +half-lives +halofantrine +halofuginone +haloperidol +halothane +hampshire +hamster +hamstring +hamstrings +hancock +handled +handling +haplotype +haptoglobin +harasima +hardened +hardness +harlan +harman +harmonic +harnesses +hartley +harvey +hatched +hatching +hay +hcl +hcv-1a +head +healing +healthy +hearing +heart +heart-type +hearts +heat +heated +heating +heavily +heavy +heavy-chain +heel +heifers +height +heights +hektoen +helical +helicase +helicobacter +helix +helper +hemagglutinin +hematocrit +hematoxylin +hematuria +hemisphere +hemispheres +hemispheric +hemodialysis +hemoglobin +hemolysis +hemophan +hemorrhage +hemorrhagic +heparin +hepatic +hepatocytes +heptane +herculite +hereford +heritability +heroin +herring +hertfordshire +hetastarch +heterochromatic +heterogeneous +heteronomous +heterophil +heterophils +heterosis +heterozygosities +heterozygosity +heterozygotes +hexagonal +hexamethylmelamine +hexanucleotide +hibernating +high +high- +high-affinity +high-grade +high-yield +high-yielding +higher +highest +highly +hilltop +hinge +hip +hippocampal +hippocampus +his +hispanic +hispanics +histamine +histidine +histidines +histocompatability +histocompatibility +histology +histone +histones +hoagland +hodgkin +hodgkin's +hoechst +hoffman +hoffmann +holland +holstein +home +homogenate +homogeneity +homogeneous +hooded +hopelessness +hopping +horizontal +hormonal +hormone +horse +hospital +hospitals +host +hostility +hot-iron-branded +houghton +hour +hours +hubbard +hue +hulls +human +humans +humerus +humidity +humoral +hyaluronidase +hybrid +hydralazine +hydrated +hydration +hydrochloride +hydrochlorothiazide +hydrocortisone +hydrogen +hydrolysate +hydromorphone +hydrophilic +hydrophobic +hydroxyurea +hydroxyzine +hyperactive +hypercholesterolemic +hyperglycemia +hyperglycemic +hypericin +hyperlipemic +hyperlipidemic +hyperoxia +hyperoxic +hyperplasia +hyperplasias +hyperpolarizing +hyperstimulated +hypertension +hypertensive +hypertensives +hyperthermia +hyperthyroid +hyperthyroidism +hypertonic +hypertrophic +hypertrophied +hypertrophy +hyperventilation +hyphal +hypocapnia +hypochromic +hypokinesis +hypophysectomized +hypophysectomy +hypopituitarism +hypopneas +hypotension +hypothalamic +hypothalamus +hypothermia +hypothyroid +hypothyroidism +hypoxanthine +hypoxemia +hypoxia +hypoxic +hypsarrhythmia +hysterectomized +hz +i +i.e. +i/sigma +ibuprofen +ichthyosis +icodextrin +icosahedral +identification +identified: +identities +identity +idiopathic +ifosfamide +ikeda +ileum +illuminance +image +imaging +imidazoline +imipramine +immature +immediately +immobilized +immune +immuno +immunoassay +immunoassayable +immunological +immunoreactive +immunoreactivity +impaired +impairment +impedance +implant +implantation +implanted +implantology +implants +implications: +impoverished +impression +improvement +impulsivity +in +in: +inactivated +inactivation +inactivator +inactive +inattentive +incentives +incidence +incisor +incisors +inclined +include +include: +included: +includes: +including +increases +increasing +indanestrol +indapamide +independent +indeterminate +index +indexes +indian +indians +indicate: +indices +indirect +individual +individually +individuals +indole +indomethacin +indoor +induced +inducer +inducible +ineffectiveness +inertance +inertia +inertial +infants +infarct +infarcted +infarction +infected +infection +infections +infectious +inferior +infiltrative +inflammation +influx +information +information: +infraspinatus +infusion +ingestion +inguinal +inhalation +inhibin +inhibited +inhibition +inhibitor +inhibitors +inhibitory +inion +initial +initiation +initiator +injection +injured +inner +innervated +innovated +inosine +inositol +input +inserted +insertion +inside +inspection +inspiration +inspiratory +institutionalized +instrumentality +instruments +insufficiency +insulation +insulin +insulinaemia +insulinemia +insurance +intact +intake +integral +intensities +intensity +intensive +interaction +intercalated +intercept +interchange +interest +interest: +interference +intermediate +intermittent +internal +international +internists +internship +interrupted +interstitial +interval +intervening +intervention +interview +intestinal +intestine +intima +intimal +intracellular +intromissions +intron +inulin +invariant +invasive +inversion +investigated: +involved +involved: +involves: +iodide +iodine +ion +ionomycin +ions +iopamidol +iota +ipratropium +ipsilateral +irbesartan +iron +irradiance +irradiances +irradiated +irradiation +irregular +irrigated +irrigation +irritant +irritants +is +is: +ischaemia +ischaemic +ischemia +ischemic +isethionate +iso-treated +isoflurane +isoform +isolated +isoleucine +isometric +isoniazid +isonymy +isoprenaline +isoproterenol +isotropic +issues: +isthmic +isthmus +italy +jacket +japanese +jejunal +jejunostomy +jejunum +jersey +jet +joggers +joining +joint +jointless +josamycin +joule +joules +jugular +juice +junction +junctional +juvenile +juxtamembrane +k +k(+) +k+ +kaempferol +kainate +kallidin +kallikrein +kanamycin +kanzo +kaolinite +kappa +karagouniko +kassinin +keloid +keloids +kelvin +keratin +keratinizing +keratinocytes +keratins +keratitis +keratometry +ketac-endo +ketamine +ketanserin +keto +ketoconazole +ketoprofen +ketorolac +ketotifen +kidney +kidneys +killer +kilodalton +kilodaltons +kindergarten +kinetic +kinetochores +kinetoplast +kininogen +kirschner +kirsten +klebsiella +knee +knees +know +knowledge +known +komarov +koniocellular +kooliner +korotkoff +kringle +kringles +kubicek +kulchitsky +kupffer +kurtosis +kwashiorkor +l-692,429 +l-ala-d-glu +l-leucine +l-name +l929 +labetalol +laboratory +labour +lacidipine +lactalbumin +lactase +lactate +lactating +lactation +lactitol +lactobacilli +lactobacillus +lactose +lactulose +lacunes +lambda +lamella +lamellae +lamellar +lamina +laminin +lamprey +landrace +langendorff +langmuir +language +lansoprazole +laparoscopic +laparoscopy +laparotomy +lard +large +larger +larynx +lasalocid +laser +lasting +latamoxef +late +latencies +latency +latent +lateral +lateralis +latum +layer +layers +laying +lead +leader +leaflets +lean +learning +leaves +lecithin +left +left-handed +leg +leghorn +legionella +legs +leiomyoma +leishmania +length +lengths +lepromatous +leptin +lesion +lesional +lesioned +lesions +less-soluble +let +lethal +leu +leu-m1 +leucine +leucocyte +leucovorin +leukemia +leukocyte +levamisole +levcromakalim +level +levels +levo +levobupivacaine +levofloxacin +levorotatory +lewis +lewisite +li +liable +lidocaine +lie +life +ligand +ligands +ligation +light +light-chain +light-cured +lighter +lightness +lignin +likelihood +limb +limbal +limited +limousin +lincomycin +lincosamide +line +linear +linearity +lingual +linker +linoleate +linoleic +lipase +lipid +lipids +lipocalin-type +lipofectin +lipopolysaccharide +liposomal +liposome +liposomes +lipreading +liquid +liquiritigenin +lire +lisinopril +lisuride +liter +liters +litter +little +live +liver +liver-type +load +loaded +loading +lobe +local +location +locus +log +london +long +long- +long-chain +long-lasting +long-lived +long-styled +long-wave +long-wavelength +long-wavelength-selective +long-wavelength-sensitive +longer +longevity +longissimus +longitudinal +loop +loops +loratadine +lorazepam +losartan +loss +loss" +lovastatin +low +low- +low-affinity +low-grade +low-yield +lower +lumbar +lumen +luminal +luminance +luminol +luminosity +lumpectomy +lung +lungs +lupin +luteal +lutein +luteolin +lymph +lympho- +lymphocyte +lymphocytes +lymphoid +lymphoma +lysine +lysis +lysosomal +lysozyme +m +m-5041t +m412 +mackay +macrolide +macrophage +macrophages +macula +madison +magnesium +magnetization +magnification +magnitude +magno +magno- +magnocellular +main +maintain +maintenance +maize +major +malabsorption +malathion +male +males +malignant +malnourished +malnutrition +maltodextrin +maltose +mammal +mammalian +mammography +mandible +mania +manics +mannan +mannitol +mannose +mannozym +mannuronic +manual +maori +marcus +margarine +margin +marginal +marijuana +marker +markers +marrow +marrow-dependent +masculine +masculinity +masked +mass +masses +masseter +mastectomy +mastoid +match +matching +maternal +matrix +maturation +maturational +mature +mauthner +maximal +maximum +meal +mean +meaning +meaningfulness +means +measles +measured +measures +meat +mecamylamine +mechanical +meclofenamate +media +medial +medialis +median +mediastinal +mediastinum +medical +medium +medium- +medium-sized +medium-wavelength +medium-wavelength-sensitive +medulla +medullary +mefloquine +megakaryocyte +megakaryocytes +meiotic +meishan +melanin +melanocytes +melanoid +melanoma +melatonin +melittin +meloxicam +melphalan +membrane +membrane-associated +membrane-binding +membrane-bound +membraneous +membranes +membranous +memory +men +meningiomas +meningitis +menses +menstrual +menstruation +meperidine +mepivacaine +merino +meropenem +mesencephalon +mesenchyma +mesenchyme +mesenteric +mesial +mesodermal +mesometrial +mesophyll +mesor +mesothelial +mesothelioma +messenger +mesterolone +mestranol +met +metabolic +metabolism +metabolite +metabolites +metabolized +metacentric +metacentrics +metacyclic +metal +metallic +metallicolous +metals +metanephrine +metaphase +metaproterenol +metarhodopsin +metastases +metastasis +metastatic +meters +metestrus +metformin +methacholine +methadone +methamphetamine +methanol +methicillin +methionine +methohexitone +methomyl +methotrexate +methoxamine +methoxychlor +methoxyflurane +methyl +methylanthranilate +methylase +methylated +methyldopa +methylergometrine +methylisobutylxanthine +methylprednisolone +methysergide +metoclopramide +metoprolol +metronidazole +metyrapone +mexiletine +mg +mg/kg +mg/m2 +mice +microalbuminuria +microelectrode +microfold +microg/kg +micromagnets +micronutrients +micropyle +microsomal +microsomes +microsporidia +microvillous +mid +mid-myocardial +mid-portion +mid-sized +mid-styled +midazolam +middle +middle- +middle-aged +middle-envelope +middle-size +middle-sized +middle-wave +middle-wave-sensitive +middle-wavelength +middle-wavelength-sensitive +midlobular +midmyocardial +midmyocardium +migraine +migrants +migration +mild +milk +milking +milkofix +million +millions +milrinone +min +mineral +miniature +minimal +minocycline +minor +minoxidil +minute +minutes +miokamycin +mirex +mirror-image +miscellaneous +misoprostol +missing +misty +mite +mitochondria +mitochondrial +mitomycin +mitoses +mitosis +mitotic +mitoxantrone +mitral +mivacurium +mixed +mixture +mj +ml(-1) +mm +mmol/l +mobility +modafinil +model +moderate +moderately +modification +modified +modifier +modulation +modulator +moduli +modulus +moisture +mol/l +molal +molality +molar +molars +molecule +moles +mollicutes +moloney +molsidomine +molt +moment +moments +monensin +monetite +money +mongrel +moniliformin +monitoring +mono- +monoamine +monoclinic +monoclonal +monocrotaline +monocyte +monocytes +monomer +monomeric +monomers +mononuclear +monophasic +montelukast +month +months +montmorillonite +more-soluble +morning +morphine +morphogenetic +morphology +mortality +morula +morulae +mosaic +mother +motilin +motor +mounts +mouse +mouth +movement +moving +mu +mucin +mucinous +mucoid +mucosa +mucosal +mucosal- +multiparous +multiple +murine +muscarine +muscarinic +muscle +muscle-specific +muscle-type +muscles +muscular +music +mutagenic +mutant +mutants +mutated +mutation +mutation, +mutations +mutator +muzolimine +myasthenia +mycelial +mycelium +mycobacterium +mycoplasma +mycorrhizal +myelin +myeloid +myocarditis +myocardium +myoglobin +myometrial +myometrium +myopathy +myosin +myricetin +mystus +n +n-terminal +n-terminus +nabilone +nacl +nadolol +nafcillin +naive +naked +naloxone +naltrexone +nandrolone +naphthalene +naproxen +narrow +nasal +nasion +nasopharynx +native +native-like +natural +nausea +nebivolol +nebuchamber +neck +necrosis +nefazodone +negative +negentropy +neglect +neighborhood +neither +nematic +neomycin +neonatal +neopterin +neopterins +neostigmine +neostriatum +neoxanthin +nephrectomized +nephrectomy +nephropathy +nerve +nerves +nervousness +netilmicin +network +neural +neuraminidase +neurinomas +neuroblast +neuroblastic +neuroblastoma +neurologic +neuronal +neurons +neuropathy +neuropeptide +neurotensin +neuroticism +neutral +neutralisation +neutralization +neutralizing +neutron +neutrons +neutrophil +neutrophils +newborn +newborns +newton +newtons +nh2-terminal +niacin +nicardipine +nicholas +nicorandil +nicotine +nicotinic +nifedipine +night +nighttime +nimesulide +nimodipine +nipple +nisin +nisoldipine +nitazoxanide +nitrate +nitrendipine +nitrification +nitrite +nitro +nitrogen +nitrogenous +nitroglycerin +nitroimidazole +nitroprusside +nitroxide +nizatidine +no +nociceptive +nocturnal +nodal +node +nodes +nodular +nodule +noise +non-arthritic +non-blood-fed +non-cf +non-sclerotic +non-stimulated +non-tumor +non-users +nonatopic +none +nonlepromatous +nonlinear +nonlinearity +nonreinforced +nonreward +nonsmokers +nonsynonymous +nontemplated +noradrenaline +norepinephrine +norfloxacin +norgestomet +normal +normalized +normally +normals +normoalbuminuria +normocapnia +normokinesis +normolipidemic +normospermic +normotension +normotensive +normotensives +normotonic +normoxia +normoxic +north +northern +norverapamil +noscapine +not +notch +nourished +novaron +novel +november +novice +noxious +nuclear +nuclei +nucleo- +nucleobase +nucleocapsid +nucleocaspid +nucleophile +nucleoprotein +nucleotide +nucleotide-binding +nucleotides +nucleus +null +nulliparous +number +numbers +nurses +nursing +nutrient +nutrition +nymphs +nystatin +nzb/blnj +o +oats +obese +obesity +object +objective +oblique +observed +observer +obstruction +obstructive +occipital +occlusion +occupation +octahedral +odor +of +ofloxacin +often +oil +olaquindox +old +older +oleate +oleic +olein +oleoyl +oligodendroglioma +oligodendrogliomas +oligosaccharides +omeprazole +oncovin +ondansetron +only +oophorectomy +opaque +open +opening +openness +operator +opposite +oral +orally +orange +orbit +orchardgrass +orchidectomized +orchiectomy +organic +organomegaly +orientation +orientational +orlistat +orosomucoid +oroxylin-a +orthorhombic +orthostatism +ossimi +osteocalcin +other +ouabain +outcome +outcomes +outdoor +outer +output +outside +ovalbumin +ovariectomized +ovariectomy +ovary +overload +ovine +oxaliplatin +oxatomide +oxidase +oxidation +oxidized +oxmetidine +oxygen +oxytetracycline +oxytocin +p +p-aminobenzamidine +p-hydroxyphenyl +pace +pachytene +pacing +pacinian +paclitaxel +paddles +pain +pair +pair-fed +paired +palatal +palindromic +palm +palmitic +palmitoyl +palpation +pamidronate +pancreas +pancreatectomy +pancreatic +pancuronium +pantethine +pantoprazole +papain +papaverine +paper +papilla +paracetamol +parallel +parameter +parameters +paraplegia +paraplegic +parasitaemia +parasympathetic +parathion +parathyroid +parenchyma +parenchymal +parent +parental +paretic +pargyline +parietal +parity +parkes +parkinsonism +paromomycin +parotid +partial +particulate +parvo +parvocellular +passage +passages +passive +pastoralists +patch +patency +pathogenic +pathologic +pathological +pathologically +pathology +pathways +patient +patients +pattern +patterns +peak +peaked +peat +peats +pectin +pediatric +pedicled +pefloxacin +pellet +pellets +pelvis +penetrating +penetration +penicillamine +penicillin +penicylinders +pentagastrin +pentazocine +pentobarbital +pentoses +pentoxifylline +peplomycin +peptide +peptides +peptidyl +peptidyl-trna +percentage +percentile +percussion +perforating +perforin +performance +perfusion +pericellular +pericytes +perimeter +perindopril +perinuclear +period +periodontitis +periosteal +peripheral +periportal +peritoneal +peritoneum +peritonitis +periventricular +permanent +permeabilities +permeability +permeation +permer +permixon +peroneal +peroxidase +peroxide +peroxy +perpendicular +persistence +persistent +persisting +personal +pertussis +ph +phagocytosis +pharmacy +pharyngeal +pharynx +phase +phases +phenanthrene +phenol +phenotype +phentolamine +phenylbutazone +phenylephrine +phenytoin +pheochromocytoma +phlebotomy +phlorizin +pholcodine +phosphate +phosphate-binding +phosphates +phospho +phospho- +phospholipid +phosphoprotein +phosphoproteins +phosphoramidon +phosphorous +phosphorus +phosphorus-31 +phosphorylated +phosphorylation +photosensitivity +photosynthesis +phoxim +phthalazine +physical +physicians +pi +pi(hs) +pietrain +pig +pill +pilocarpine +pimozide +pinacidil +pindolol +pinhole +pink +pink-eyed +pinzgauer +piperacillin +piracetam +pirenzepine +piroxicam +pitch +pituitary +placebo +placebo-controlled +placebos +placental +plagiocephaly +plain +plant +plantaris +plants +plaque +plasma +plasmacytoid +plasmapheresis +plasmid +plasmin +plasminogen +plastic +plateau +platelet +platelet-type +platelets +platinum +pleomorphic +plication +plosive +plus +pmoles +pneumonia +pneumoperitoneum +point +poise +poland +polar +polarity +polarization +polarizations +poleward +polished +pollock +polluted +poly +polyclonal +polydispersity +polymer +polymerase +polymerase-associated +polymeric +polymorphic +polymorphism +polyneuropathy +polypay +polypeptide +polypeptides +polyphenols +polyps +polyunsaturated +polyvinylpyrrolidone +pons +pontine +pony +poor +population +populations +porcelain +porcine +pore +pore-forming +pork +porosity +porphyrin +portsmouth +position +positions +positive +post +post- +post-natal +posterior +posteriorly +posthatching +postnatal +postnatally +postpartum +postural +potassium +potato +potency +potential +power +ppm +practice +pravastatin +prazosin +pre-existing +pre-pubertal +precipitation +precision +precursor +precursors +predictability +predicted +prednisolone +prednisone +predominant +preeclampsia +preference +preferred +preferring +pregesterone +pregnancies +pregnancy +pregnant +pregnenolone +preload +premature +prenalterol +prenylamine +preparation +preperitoneal +prepuberal +presence +present +pressor +pressure +pressure-loading +pressure-sensitive +pressures +preterm +prevalence +preventable +primary +primer +priming +primiparous +primitive +principal +pristinamycin +pro +pro-oestrus +probabilities +probability +probe +probenecid +problem +probucol +procainamide +procaine +procarbazine +procedure +procedures +processes +processing +procholeragenoid +product +production +productive +productivity +proestrous +proestrus +profile +prog +progenitor +progesterone +progesterone's +progesterone, +progesterone- +progestin +progestins +progestogen +progestogens +program +progression +progressive +progressor +progressors +proinflammatory +prolactin +proliferating +proliferation +proliferative +proline +promethazine +promoter +promoters +promotion +prone +propacetamol +propafenone +propanediol +properdin +prophylactic +propionate +propofol +proportion +proportional +proportions +propoxyphene +propranolol +prostate +prostatic +protamine +protease +protected +protection +protein +protein-rich +proteins +proteinuria +proteolytic +protocol +protocols +protoplasmic +prototype +protozoa +protruding +protrusion +protrusive +proximal +pseudoanodontia +pseudomonas +pseudorotation +psoralen +psychiatric +psychological +psychoticism +pugh-child +pulmonary +pulsatile +pulse +pulses +pulvinar +pump +puncture +pupae +purified +purine +purinergic +purkinje +push +putamen +putrescine +pyramidal +pyrene +pyridine +pyridostigmine +pyridoxine +pyrimethamine +pyrolysis +pyruvate +q +q10 +quackenbush +quadriceps +quadrivalent +quadrupole +quality +quantitative +quantity +quartile +quartiles +quenchers +quercetin +query +questionnaire +queuine +queuosine +quiescence +quiescent +quiet +quinacrine +quinapril +quinidine +quinine +quinoid +quinoline +quinone +quinones +quintiles +quisqualate +quotient +quotients +rabbit +rabeprazole +rac, +rac-, +racemic, +radial +radiation +radicals +radiculography +radii +radioactive +radioiodinated +radiolabeled +radiotherapy +radius +rads +raffinose +rahmani +raillietina +raloxifene +raman +rambouillet +ramipril +rams +ran +random +randomness +range +ranitidine +rankin +rapid +rarefaction +rarely/never +rat +rate +rates +ratio +ration +ratios +rats +rats, +rauscher +raw +rcc-36, +reabsorption +reacted +reaction +reaction, +reactions +reactive +reactivity +reagent +reagent, +reagents +rear +rearranged +rearrangement +reasoning +rebreathing +recalls +received +recently, +receptive +receptor +receptor-binding +receptor-regulated +receptors +receptors, +recession +recessive +recipient +recipients +reciprocal +recirculation +recognized +recollection +recombinant +recombinase +recovery +rectal +rectum +recurrent +red +red-eye +red-light +reduced +reducing +reductase +reduction +reference +reflectance +reflex +reflexes +reflow +refractile +refraction +refractoriness +refractory +region +regional +regions +regression +regressor +regular +regulator +regulatory +regurgitant +regurgitation +rehabilitation +rehydration +reinforced +reinforcement +rejection +relapse +relapsers +relapsing +related +relatedness +relation +relationship +relative +relatives +relaxation +relaxed +relaxin +relaxing +release +relevant +reliabilities +reliability +remaining +remember +remifentanil +remission +removal +removed +renal +renin +renshaw +reoxygenation +repeat +repeatability +repeated +repeats +reperfused +reperfusion +repetitive +replacement +replicase +repolarization +repression +repressor +reproducibility +reproductive +resection +reserpine +resident +residents +residual +residue +residues +resistance +resistances +resistant +resistive +resistivity +resistor +resolution +resolve +resolved +resonance +respective +respiration +respiratory +responded +responder +responders +responding +response +responses +responsive +rest +rested +resting +restrained +restraint +restricted +restriction +resulting +results: +resuscitation +resveratrol +retard +retardation +retentate +retention +reticular +reticulocytes +retina +retinol +retinular +retraction +retransfusion +retrograde +retroperitoneal +retzius +reverse +reward +rewarded +rhabdomyosarcoma +rhamnose +rheumatism +rhinitis +rhodamine +rhodopsin +rhombohedral +rhombomere +rhombomeres +rhythmic +ribavirin +ribose +ribosomal +ribosomes +rice +ricin +rickard +ridgelands +rifampicin +rifampicine +rifampin +rigevidon +right +right-handed +rigidified +rigidity +rilmenidine +ring +rinn +ripe +risk +ristocetin +ritonavir +rituximab +rna +rocuronium +rodent +roentgen +roentgens +rolipram +romanov +root +ropivacaine +rostral +rotation +rough +roughness +round +roundness +routine +rower +roxatidine +rs14203, +rubella +rudimentary +ruminal +ruminant +run +runners +running +rupture +rural +rutin +ryanodine +rye +s +s-methyl +s-phase +saccade +saccharose +sacrum +sagittal +salbutamol +salicylate +saline +salinity +salinomycin +saliva +salivary +salivary-like +salmeterol +salmon +salt-hypertension-sensitive +salt-sensitive +same +sample +samples +sand +saponins +sarafotoxin +sarcoidosis +sarcomatoid +sardi +sarsaponin +sartorius +satisfactory +saturated +saturation +scale +scao +scar +scatter +scattering +scavenger +schistosoma +schizophrenia +schizophrenics +sclerosis +scmc +scopolamine +score +scores +seated +secobarbital +second +secondary +seconds +secreted +secretin +secretion +secretory +sections +sedation +sedentary +sedimentation +segment +segmental +segments +select +selected +selection +selective +selegiline +selenium +self +self-concept +self-incompatibility +sella +semiconducting +seminoma +seminomas +semitendinosus +senescent +sense +sensibility +sensing +sensitive +sensitivity +sensitized +sensitizer +sensolog +sensory +sentence +separation +sephadex +sepiolite +sepsis +septal +september +septic +septum +sequence +sequence, +sequential +ser +sera +serial +series +series, +serine +serine-rich +serines +serosa +serosal +serotonin +serovar +serres +serum +serum-soluble +services +several +severe +severity +sevoflurane +sex +shade +shaft +shakuyaku +sham +sham-branded +sham-operated +sham-pinealectomized +sharpness +shear +sheep +shell +shift +shock +shocked +short +short- +short-wave +short-wave-sensitive +short-wavelength +short-wavelength-sensitive +shortening +shorter +shorthorn +shortwave-sensitive +shoulder +showed +shunt +siblings +sibutramine +sickle +side +sigmoidicity +signal +significant +silage +silane +silent +silver +silymarin +similarity +similarly, +simmental +simple +simultaneous +simvastatin +since +single +single-stage +singlet +sinus +sinusoids +site +sites +sitting +size +sizes +skeletal +skeletonized +skin +skull +sleep +slices +slides +slight +slope +slopes +slow +slow-resistant +slow-twitch +sludge +sluggish +small +small-surface +smaller +smallest +smoke +smokers +smoking +smooth +snout +social +sodium +sodium-sensitive +softness +soil +solcoseryl +soleus +solid +solitus +solubilities +solubility +soluble +solute +solution +solvent +somatic +somatomedin +somatosensory +somatostatin +somatotropin +somite +somites +sonicare +sorbitol +sotalol +sound +south +southern +sows +soy +soybean +space +spacer +spacings +spain +spatial +specialists +species +specific +specificity +spectinomycin +spectrophotometric +speed +spermidine +spherical +sphingomyelin +sphingosine +spike +spin +spinal +spine +spinnbarkeit +spirometry +spironolactone +spleen +splenectomized +splenic +splenocytes +spontaneous +sporadic +spot +spotting +spray +spread +spreading +spretus +spring +sprint +sprint-trained +sprinters +sprotte +sr48968 +sramek +stabe +stabilization +stable +stacked +stage +staging +standard +standard, +standards +standing +stanozolol +staphylococcus +star +starch +start +starvation +starved +state +static +stationary +stature +stearic +stearylamine +stenosis +stenting +step +stereopreference +sterile +steroid +steroids +stiffness +stilbenedisulfonates +stimulated +stimulating +stimulation +stimuli +stimulus +stoichiometry +stomach +stool +storage +strain +strata +stratum +straw +strength +streptokinase +streptomyces +streptomycin +stress +stressed +stretch +striatal +striatum +strict +stripping +stroke +stroma +stromal +strong +strongly +structure +students +studies, +study +study, +styrene +subcutaneous +subiculum +subject +subjective +subjects +submerged +subordinates +subpopulations +substance +substituted +substitution +substrate +substrate, +substrate-adherent +substrates +subtilisin +subtypes, +subunit +success +successful +succinate +succinylcholine +suckled +suckling +sucralfate +sucrase +sucrose +sufentanil +suffolk +sugar +sulfadiazine +sulfadoxine +sulfamethoxazole +sulfatase +sulfate +sulfated +sulfates +sulfinpyrazone +sulfisoxazole +sulfite +sulfonate +sulfoxide +sulfoxide, +sulfur +sulglicotide +sulodexide +sulphamethoxazole +sulphate +sulphide +sulphur +sulpiride +sum +summation +summer +superconducting +superficial +superior +superlative +supernatant +supernatants +supination +supine +supplement +supplementation +supplemented +supplies +support +suppressor +suprofen +suramin +surface +surfaces +surfactant +surgeon +surgeons +surgery +surgical +survanta +survey +survival +survivors +susceptibility +susceptible +susceptibles +suspect +suspended +suspension +sustained +suture +sweden +swedish +swimmers +swimming +swiss +switch +syk +symmetric +symmetrical +sympathetic +symptom +symptomatic +symptoms +syn +synaptic +synchronization +synchronous +synchrony +syncope +synonymous +synthase +synthesis +synthesized +synthetic +syringyl +sysomicin +system +system, +systemic +systems, +systole +systolic +t +t-cell +tablet +tablets +tachycardia +tacrolimus +tactile +tail +taiwanese +tallow +tamoxifen +tandem +tap +tape +tar +tarentaise +target +targets +targhee +task +tau +taurine +taut +taxol +taxotere +tazobactam +tear +technical +tectum +teeth +tegafur +teicoplanin +telangectasia +telmisartan +telomere +telomeric +telophase +telsa +temperature +temperatures +tempering +template +temporal +tendinous +tendon +teniposide +tense +tensed +tension +tensions +tenue +teratoma +teratozoospermia +terazosin +terbutaline +terfenadine +term +terminal +terminator +terminators +territorial +tertatolol +tesla +test +tester +testerone +testing +testis +testosterone +testosterone's +testosterone, +tetanus +tetracaine +tetracycline +tetragonal +tetrahedral +tetramethylrhodamine +tetranychus +tetraplegic +tetraploid +texas +thalamus +thalidomide +that +the +theca +thelytoky +theophylline +therapy +therapy-related +thermophilic +theta +thiacetazone +thiamin +thiamine +thickness +thicknesses +thigh +thighs +thin +thin-thread +thiolation +thionein +thiopental +thiopentone +thiophene +thiophosphamide +thioridazine +thiotepa +this +thomsen-friedenreich +thoracic +thr +threat +threonine +threshold +thresholds +thrombin +thrombocytopenia +thrombolysis +thrombosis +thrombus +thymic +thymic-dependent +thymic-derived +thymidine +thymine +thymines +thymoma +thymus +thymus- +thymus-dependent +thymus-derived +thymus-processed +thyroid +thyroidectomized +thyroidectomy +thyroxine +tianeptine +tiapamil +tibetans +tibia +tibial +tibolone +ticlopidine +tilapia +tilt +time +time-reversal +timed +times +timolol +tinidazole +tiny +tip +tissue +tissue-type +tissues +titanium +titer +tizoxanide +to +tobacco +tobramycin +tocopherol +tocopherols +tolbutamide +tolerance +tolerant +tolmetin +toluene +tone +tongue +tonic +tonnes +tons +tonsillectomy +top +topotecan +toronto +torpedo +torque +torr +torsion +torsional +total +toth +touch +tourniquet +toxicity +toxoid +tpsic +tq +trabecular +tracheal +traditional +trained +traineeship +training +tramadol +trandolapril +trans +transcribed +transducin +transection +transfer +transferred +transferrin +transformed +transfusion +transgenic +transient +transit +transit-time +transition +transitional +translation +translational +translocation +transmembrane +transmission +transmissivity +transmittance +transpiration +transplant +transplantation +transplanted +transplants +transport +transverse +trastuzumab +trauma +traumatic +trays +treadmill +treated +treatment +treatments +trehalase +trehalose +tremor +trendelenburg +trenimon +tri- +trial +triamcinolone +triamterene +triazolam +triceps +trichophyton +triclinic +tricuspid +tricyclic +triglycerides +trimer +trimethoprim +trimipramine +tripelennamine +triple +triplet +triptolide +trocar +trochanter +troglitazone +tropisetron +trough +true +truncated +trypanosoma +trypsin +tryptamine +tryptase +tryptic +tryptophan +tuberculoid +tubes +tubular +tubule +tubulopathic +tuli +tumor +tumor-bearing +tumorlike +tumorous +tumors +tumour +tumours +turbuhaler +turmeric +turned +tuscany +twin +two +tylosin +tymidine +type +tyr1 +tyramine +tyrosine +tyrosol +tytin +u-0521 +ubiquitinated +ulceration +ulcers +ulinastatin +ulnar +ultrasound +unaffected +uncertain +uncertainty +unchanged +unclassifiable +unclassified +unconjugated +uncontrolled +undernourished +undernutrition +undifferentiated +unfavourable +unfolded +unfractionated +uniformly +unilateral +uninfected +uninvolved +unipolar +unit +united +units +units/ml +unmodified +unrelated +unresolved +unresolved/disorganized +unrestricted +unsaturated +unstimulated +untrained +untreated +up +upper +upright +upstream +uptake +uracil +uraemic +uranium +urapidil +urban +urea +urecholine +uremia +uremic +ureter +urethane +urgency +uridine +uridines +uridylate +urinary +urine +urokinase +users +using +usual +uterine +uterus +utilization +v +v-thread +vacancies +vaccinated +vaccination +vaccine +vacuolar +vacuolar-type +vagal +vagina +vaginal +vaginally +vagotomy +val +valency +validation +valine +valsalva +valsartan +value +valves +vanadate +vanadium +vancomycin +var +variability +variable +variable-region +variance +variant +variants +variation +varicella +varicocele +variety +varnish +vascular +vasculitis +vector +vecuronium +vegetable +vegetarian +vegetarians +vegetative +vehicle +vein +veins +velocities +velocity +venlafaxine +venoles +venom +venous +ventilated +ventilation +ventilatory +ventral +ventricle +ventricles +ventricular +venules +verapamil +veratridine +verb +verbal +vermilion +verprolin-homology +vertebrae +vertebral +vertical +vesicle +vesicles +vesicular +vessels +vestibular +vetch +viability +viable +vibrational +vigor +villi +villin +vimentin +vinblastine +vinclozolin +vincristine +vindesine +vinorelbine +violaxanthin +violet +viral +virchowian +virgin +virion +virulence +virulent +virus +viruses +visceral +viscosity +vision +visits +visual +visual-only +visually +vitality +vitremer +vivarium +vivonex +vmax +voltage +voltages +volts +volume +volumes +volumetric +volunteers +vomiting +vomitoxin +vowel +w(peak) +waist +wake +wakefulness +waking +walking +wall +warfarin +warm +warm-sensitive +warmed +warmth +washed +water +watermelon +watt +watts +wave +weak +weakly +weaned +weanimix +weaning +wedge +wedged +week +weekly +weeks +weighed +weight +weighted +weights +well-differentiated +well-nourished +west +western +wet +wettedness +wheat +wheezing +white +white-eye +white-spotting +whites +whole +whorls +width +wiener +wiktor +wild +wild-type +wilted +wine +winter +wistar +with +withdrawal +wogonin +wollastonite +women +word +work +workload +workshop +wrinkled +wrist +x +x-linked +xanthan +xanthine +xbai +xenopus +xylazine +xylenes +xylitol +xylose +y +year +years +yeast +yeast-like +yeasts +yellow +yellowing +yield +yields +yogurt +yohimbine +yoked +yolk +yorkshire +young +younger +yttrium +z100 +zafirlukast +zearalenone +zeatin +zeaxanthin +zebrafish +zebu +zeolite +zeranol +zeste +zeta +zofenopril +zolpidem +zomepirac +zooplankton +zopiclone +zoster +zucker +zygapophysial +zymodeme +zymodemes +zymosan diff --git a/Library/WordData/stop b/Library/WordData/stop index e9e4e60c1afe3068dad0b6796b75a8e3043569f1..f24b24555d06b11f20ed93c84b6b7a73671eeab5 100644 --- a/Library/WordData/stop +++ b/Library/WordData/stop @@ -1,313 +1,313 @@ -a -about -above -across -after -afterwards -again -against -al -all -almost -alone -along -already -also -although -always -am -among -amongst -an -analyze -and -another -any -anyhow -anyone -anything -anywhere -applicable -apply -are -around -as -assume -at -be -became -because -become -becomes -becoming -been -before -beforehand -being -below -beside -besides -between -beyond -both -but -by -came -cannot -cc -cm -come -compare -could -de -dealing -department -depend -did -discover -dl -do -does -during -each -ec -ed -effected -eg -either -else -elsewhere -enough -et -etc -ever -every -everyone -everything -everywhere -except -find -for -found -from -further -get -give -go -gov -had -has -have -he -hence -her -here -hereafter -hereby -herein -hereupon -hers -herself -him -himself -his -how -however -hr -ie -if -ii -iii -in -inc -incl -indeed -into -investigate -is -it -its -itself -j -jour -journal -just -kg -last -latter -latterly -lb -ld -letter -like -ltd -made -make -many -may -me -meanwhile -mg -might -ml -mm -mo -more -moreover -most -mostly -mr -much -must -my -myself -namely -neither -never -nevertheless -next -no -nobody -noone -nor -not -nothing -now -nowhere -of -off -often -on -only -onto -or -other -others -otherwise -our -ours -ourselves -out -over -own -oz -per -perhaps -pm -precede -presently -previously -pt -rather -regarding -relate -said -same -seem -seemed -seeming -seems -seriously -several -she -should -show -showed -shown -since -so -some -somehow -someone -something -sometime -sometimes -somewhere -still -studied -sub -such -take -tell -th -than -that -the -their -them -themselves -then -thence -there -thereafter -thereby -therefore -therein -thereupon -these -they -this -thorough -those -though -through -throughout -thru -thus -to -together -too -toward -towards -try -type -ug -under -unless -until -up -upon -us -used -using -various -very -via -was -we -were -what -whatever -when -whence -whenever -where -whereafter -whereas -whereby -wherein -whereupon -wherever -whether -which -while -whither -who -whoever -whom -whose -why -will -with -within -without -wk -would -wt -yet -you -your -yours -yourself -yourselves -yr +a +about +above +across +after +afterwards +again +against +al +all +almost +alone +along +already +also +although +always +am +among +amongst +an +analyze +and +another +any +anyhow +anyone +anything +anywhere +applicable +apply +are +around +as +assume +at +be +became +because +become +becomes +becoming +been +before +beforehand +being +below +beside +besides +between +beyond +both +but +by +came +cannot +cc +cm +come +compare +could +de +dealing +department +depend +did +discover +dl +do +does +during +each +ec +ed +effected +eg +either +else +elsewhere +enough +et +etc +ever +every +everyone +everything +everywhere +except +find +for +found +from +further +get +give +go +gov +had +has +have +he +hence +her +here +hereafter +hereby +herein +hereupon +hers +herself +him +himself +his +how +however +hr +ie +if +ii +iii +in +inc +incl +indeed +into +investigate +is +it +its +itself +j +jour +journal +just +kg +last +latter +latterly +lb +ld +letter +like +ltd +made +make +many +may +me +meanwhile +mg +might +ml +mm +mo +more +moreover +most +mostly +mr +much +must +my +myself +namely +neither +never +nevertheless +next +no +nobody +noone +nor +not +nothing +now +nowhere +of +off +often +on +only +onto +or +other +others +otherwise +our +ours +ourselves +out +over +own +oz +per +perhaps +pm +precede +presently +previously +pt +rather +regarding +relate +said +same +seem +seemed +seeming +seems +seriously +several +she +should +show +showed +shown +since +so +some +somehow +someone +something +sometime +sometimes +somewhere +still +studied +sub +such +take +tell +th +than +that +the +their +them +themselves +then +thence +there +thereafter +thereby +therefore +therein +thereupon +these +they +this +thorough +those +though +through +throughout +thru +thus +to +together +too +toward +towards +try +type +ug +under +unless +until +up +upon +us +used +using +various +very +via +was +we +were +what +whatever +when +whence +whenever +where +whereafter +whereas +whereby +wherein +whereupon +wherever +whether +which +while +whither +who +whoever +whom +whose +why +will +with +within +without +wk +would +wt +yet +you +your +yours +yourself +yourselves +yr diff --git a/Library/runn.C b/Library/runn.C index d3d3c89fcc2cd80bb003d738f50eb67e3940be3d..2d029adb39d98b50eceb4ffddc36320c40f08723 100644 --- a/Library/runn.C +++ b/Library/runn.C @@ -1,216 +1,216 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "runn.h" -using namespace std; -namespace iret { - -int mark(int pflag, long ct, int ivl, const char *what){ -if(pflag&&((ct%ivl)==0)){cout << what << " count=" << ct << endl; - return(1);} -else return(0); -} - -int get_qflag(){ - int pflag=1; - ifstream fin("quiet.flag",ios::in); - if(fin.is_open()){ - fin >> pflag; - fin.close(); - fin.clear(); - } - return(pflag); -} - -int get_pathw(char *nam,const char *pfl,const char *pex,const char *ch){ - char cnam[256]; - - strcpy(cnam,"path_"); - strcat(cnam,pfl); - strcat(cnam,"_"); - strcat(cnam,pex); - strcat(cnam,"."); - strcat(cnam,ch); - ifstream fin(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path_"); - strcat(cnam,pfl); - strcat(cnam,"_"); - strcat(cnam,pex); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path_"); - strcat(cnam,pfl); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - fin.clear(); - strcpy(cnam,"path"); - fin.open(cnam,ios::in); - if(!fin.is_open()){ - cout << "Path file for type " << pfl - << " does not exist!" << endl; - exit(1); - } - } - } - } - - fin.getline(nam,256); - fin.close(); - strcat(nam,pfl); - strcat(nam,"_"); - strcat(nam,pex); - strcat(nam,"."); - strcat(nam,ch); - return(1); -} - -char *add_num(const char *ptr,long n,char *buf){ - char cnam[100]; - long_str(cnam,n); - strcpy(buf,ptr); - strcat(buf,cnam); - return(buf); -} - -long gseed(int x, char **v, const char *c){ - long seed; - - seed=clnga(x,v,c,"seed for random number generator"); - srandom((unsigned int)seed); - return seed; -} - -long zrand(long p){ - return(((long)random())%p); -} - -void shuffle(long n,long *idx){ - long i,j,k; - for(i=0;i0;i--){ - k=zrand(i+1); - j=idx[i]; - idx[i]=idx[k]; - idx[k]=j; - } -} - -long clnga(int x, char **v, const char *c, const char *name){ - int i,flag=1; - long num; - - for(i=1;i> num; - if(oss.fail()){ - cout << "Enter " << name << ":" << endl; - cin >> num; - } - } - if(flag==1){ - cout << "Enter " << name << ":" << endl; - cin >> num; - cin.get(); - } - return(num); -} - -long rnd(double p) -{ -return((long)floor(p+.5)); -} - -double cdbla(int x, char **v, const char *c, const char *name){ - int i,flag=1; - double num; - - for(i=1;i> num; - if(oss.fail()){ - cout << "Enter " << name << ":" << endl; - cin >> num; - } - } - if(flag==1){ - cout << "Enter " << name << ":" << endl; - cin >> num; - cin.get(); - } - return(num); -} - -char *cstra(int x, char **v, const char *c, const char *name){ - int i; - char cnam[max_str]; - - for(i=1;i> n; -} - -//Function to convert first two char of string to an -//integer. Should be an ASCII null terminated string -int trac(const char *str){ - if(!(*str))return(0); - else { - return((int)(*(str+1))+128*((int)(*str))); - } -} - -} +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "runn.h" +using namespace std; +namespace iret { + +int mark(int pflag, long ct, int ivl, const char *what){ +if(pflag&&((ct%ivl)==0)){cout << what << " count=" << ct << endl; + return(1);} +else return(0); +} + +int get_qflag(){ + int pflag=1; + ifstream fin("quiet.flag",ios::in); + if(fin.is_open()){ + fin >> pflag; + fin.close(); + fin.clear(); + } + return(pflag); +} + +int get_pathw(char *nam,const char *pfl,const char *pex,const char *ch){ + char cnam[256]; + + strcpy(cnam,"path_"); + strcat(cnam,pfl); + strcat(cnam,"_"); + strcat(cnam,pex); + strcat(cnam,"."); + strcat(cnam,ch); + ifstream fin(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path_"); + strcat(cnam,pfl); + strcat(cnam,"_"); + strcat(cnam,pex); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path_"); + strcat(cnam,pfl); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + fin.clear(); + strcpy(cnam,"path"); + fin.open(cnam,ios::in); + if(!fin.is_open()){ + cout << "Path file for type " << pfl + << " does not exist!" << endl; + exit(1); + } + } + } + } + + fin.getline(nam,256); + fin.close(); + strcat(nam,pfl); + strcat(nam,"_"); + strcat(nam,pex); + strcat(nam,"."); + strcat(nam,ch); + return(1); +} + +char *add_num(const char *ptr,long n,char *buf){ + char cnam[100]; + long_str(cnam,n); + strcpy(buf,ptr); + strcat(buf,cnam); + return(buf); +} + +long gseed(int x, char **v, const char *c){ + long seed; + + seed=clnga(x,v,c,"seed for random number generator"); + srandom((unsigned int)seed); + return seed; +} + +long zrand(long p){ + return(((long)random())%p); +} + +void shuffle(long n,long *idx){ + long i,j,k; + for(i=0;i0;i--){ + k=zrand(i+1); + j=idx[i]; + idx[i]=idx[k]; + idx[k]=j; + } +} + +long clnga(int x, char **v, const char *c, const char *name){ + int i,flag=1; + long num; + + for(i=1;i> num; + if(oss.fail()){ + cout << "Enter " << name << ":" << endl; + cin >> num; + } + } + if(flag==1){ + cout << "Enter " << name << ":" << endl; + cin >> num; + cin.get(); + } + return(num); +} + +long rnd(double p) +{ +return((long)floor(p+.5)); +} + +double cdbla(int x, char **v, const char *c, const char *name){ + int i,flag=1; + double num; + + for(i=1;i> num; + if(oss.fail()){ + cout << "Enter " << name << ":" << endl; + cin >> num; + } + } + if(flag==1){ + cout << "Enter " << name << ":" << endl; + cin >> num; + cin.get(); + } + return(num); +} + +char *cstra(int x, char **v, const char *c, const char *name){ + int i; + char cnam[max_str]; + + for(i=1;i> n; +} + +//Function to convert first two char of string to an +//integer. Should be an ASCII null terminated string +int trac(const char *str){ + if(!(*str))return(0); + else { + return((int)(*(str+1))+128*((int)(*str))); + } +} + +} diff --git a/Library/runn.h b/Library/runn.h index 94dfd53a6c022e0c850b4db8f4e244975f47933f..299fac0ea37b06aec6e76ac0dee4cd0c0d7146a5 100644 --- a/Library/runn.h +++ b/Library/runn.h @@ -1,392 +1,392 @@ -#ifndef RUNN_H -#define RUNN_H - -#include -#include -#include -#include -#include -using namespace std; -namespace iret { - -const int word_cnt = 5000; //Maximum number of words in a document. -const int word_len = 1500; //Maximum word length. -const long max_str=1500; //Maximum string length. - -int get_pathw(char *cn,const char *dfl,const char *dex,const char *a); - //Reads the path from a file "path_(*dfl)" and constructs the - //file name from as "(*dfl)_(*dex).(*a)". Cats path and file - //name and returns the full info in cn. -char *add_num(const char *ptr,long n,char *buf); //converts long to ascii - //and cats to end of string and returns pointer to new string - //that results. Does not change input string. The new string is - //held in buffer space and this is overwritten at each call. - -int get_qflag(); - //This function gets the value of the print flag pflag that is - //used to control output. -int mark(int,long,int,const char*); - //This function is used to print out information that indicates - //how a function is progressing. It is dependent on the value of - //pflag. -long gseed(int,char**,const char*); - //This function is called to allow the input of a seed value for - //the random number generator. It must be called in main or the - //arguments of main must be passed down to it if it is to allow - //command line entry. Otherwise the first argument may be set to - //zero and it may be used to enter the seed at run time from the - //console. -long clnga(int,char**,const char*,const char*); - //Allows a long to be entered from the console at run time if the - //first argument is set to zero. If the first two arguments are - //the arguments of main, then it allows command line entry with - //the flag that is the third argument and with a statement about - //the input that is the fourth argument. -double cdbla(int,char**,const char*,const char*); -char *cstra(int,char**,const char*,const char*); -long zrand(long); - //Produces a random long integer that is in the range [0,argument). - //Machinery of the random number generator. -void shuffle(long n,long *idx); //Randomly shuffles an array of longs. -void dshuffle(long n,long *idx); //Randomly shuffles an array of longs. - //Improved version suggested by Don Comeau -long rnd(double); - //Rounds off a double and returns the integer that results. - - //Reads in a string including white space and ends the string - //just before the character a. -inline int get_string(char *cnam,ifstream &ifile,char a){ - char *pch = cnam; - long j=1; - - start: - if((*(pch++)=ifile.get())!=EOF){ - if(*(pch-1)==a){pch--;goto start;} - while(((*(pch++)=ifile.get())!=a)&&(j -void sSort(const long ix, X *idx){ - long k, j, ir, i; - X rra; - - if(ix<=1)return; - - k=(ix>>1); - ir=ix-1; - for(;;) { - if(k>0) { - rra=idx[--k]; - } - else { - rra=idx[ir]; - idx[ir] = idx[0]; - if(--ir ==0) { - idx[0]=rra; - return; - } - } - i=k; - j=((k+1)<<1)-1; - while(j<=ir) { - if(j -void sRort(const long ix, X *idx){ - long k, j, ir, i; - X rra; - - if(ix<=1)return; - - k=(ix>>1); - ir=ix-1; - for(;;) { - if(k>0) { - rra=idx[--k]; - } - else { - rra=idx[ir]; - idx[ir] = idx[0]; - if(--ir ==0) { - idx[0]=rra; - return; - } - } - i=k; - j=((k+1)<<1)-1; - while(j<=ir) { - if(jidx[j+1])) ++j; - if(rra>idx[j]) { - idx[i]=idx[j]; - j +=(i=j)+1; - } - else j=ir+1; - } - idx[i]=rra; - } -} - -template -void hSort(const long n, X *ra, Y *rb) { - long k, j, ir, i; - X rra; - Y rrb; - - if(n<=1)return; - - k=(n>>1); - ir=n-1; - for(;;) { - if(k>0) { - rra=ra[--k]; - rrb=rb[k]; - } - else { - rra=ra[ir]; - rrb=rb[ir]; - ra[ir] = ra[0]; - rb[ir] = rb[0]; - if(--ir ==0) { - ra[0]=rra; - rb[0]=rrb; - return; - } - } - i=k; - j=((k+1)<<1)-1; - while(j<=ir) { - if(j -void hSort(const long n, X *ra, Y *rb, Z *rc) { - long k, j, ir, i; - X rra; - Y rrb; - Z rrc; - - if(n<=1)return; - - k=(n>>1); - ir=n-1; - for(;;) { - if(k>0) { - rra=ra[--k]; - rrb=rb[k]; - rrc=rc[k]; - } - else { - rra=ra[ir]; - rrb=rb[ir]; - rrc=rc[ir]; - ra[ir] = ra[0]; - rb[ir] = rb[0]; - rc[ir] = rc[0]; - if(--ir ==0) { - ra[0]=rra; - rb[0]=rrb; - rc[0]=rrc; - return; - } - } - i=k; - j=((k+1)<<1)-1; - while(j<=ir) { - if(j -void hRort(const long n, X *ra, Y *rb) { - long k, j, ir, i; - X rra; - Y rrb; - - if(n<=1)return; - - k=(n>>1); - ir=n-1; - for(;;) { - if(k>0) { - rra=ra[--k]; - rrb=rb[k]; - } - else { - rra=ra[ir]; - rrb=rb[ir]; - ra[ir] = ra[0]; - rb[ir] = rb[0]; - if(--ir ==0) { - ra[0]=rra; - rb[0]=rrb; - return; - } - } - i=k; - j=((k+1)<<1)-1; - while(j<=ir) { - if(j ra[j+1]) ++j; - if(rra>ra[j]) { - ra[i]=ra[j]; - rb[i]=rb[j]; - j +=(i=j)+1; - } - else j=ir+1; - } - ra[i]=rra; - rb[i]=rrb; - } -} - -template -void hRort(const long n, X *ra, Y *rb, Z *rc) { - long k, j, ir, i; - X rra; - Y rrb; - Z rrc; - - if(n<=1)return; - - k=(n>>1); - ir=n-1; - for(;;) { - if(k>0) { - rra=ra[--k]; - rrb=rb[k]; - rrc=rc[k]; - } - else { - rra=ra[ir]; - rrb=rb[ir]; - rrc=rc[ir]; - ra[ir] = ra[0]; - rb[ir] = rb[0]; - rc[ir] = rc[0]; - if(--ir ==0) { - ra[0]=rra; - rb[0]=rrb; - rc[0]=rrc; - return; - } - } - i=k; - j=((k+1)<<1)-1; - while(j<=ir) { - if(j ra[j+1]) ++j; - if(rra>ra[j]) { - ra[i]=ra[j]; - rb[i]=rb[j]; - rc[i]=rc[j]; - j +=(i=j)+1; - } - else j=ir+1; - } - ra[i]=rra; - rb[i]=rrb; - rc[i]=rrc; - } -} - - -//Function to convert a long to a null terminated string. -void long_str(char *cnam,long n); - -//Function to convert a string with null termination -//to a long. -void str_long(char *cnam,long &n); - -//Function to convert first two char of string to an -//integer. Should be an ASCII null terminated string -int trac(const char *str); - -template -void xshuffle(Y n,Z *idx){ //Randomly shuffles an array of longs. - Y i,k; - Z u; - for(i=n-1;i>0;i--){ - k=(Y)zrand((long)i+1); - u=idx[i]; - idx[i]=idx[k]; - idx[k]=u; - } -} - -template -void dxhuffle(long n,Z *idx){ //Randomly shuffles an array type Z*. - long i,k; - Z xx; - for(i=n-1;i>0;i--){ - k=zrand(i+1); - xx=idx[i]; - idx[i]=idx[k]; - idx[k]=xx; - } -} - -} -#endif +#ifndef RUNN_H +#define RUNN_H + +#include +#include +#include +#include +#include +using namespace std; +namespace iret { + +const int word_cnt = 5000; //Maximum number of words in a document. +const int word_len = 1500; //Maximum word length. +const long max_str=1500; //Maximum string length. + +int get_pathw(char *cn,const char *dfl,const char *dex,const char *a); + //Reads the path from a file "path_(*dfl)" and constructs the + //file name from as "(*dfl)_(*dex).(*a)". Cats path and file + //name and returns the full info in cn. +char *add_num(const char *ptr,long n,char *buf); //converts long to ascii + //and cats to end of string and returns pointer to new string + //that results. Does not change input string. The new string is + //held in buffer space and this is overwritten at each call. + +int get_qflag(); + //This function gets the value of the print flag pflag that is + //used to control output. +int mark(int,long,int,const char*); + //This function is used to print out information that indicates + //how a function is progressing. It is dependent on the value of + //pflag. +long gseed(int,char**,const char*); + //This function is called to allow the input of a seed value for + //the random number generator. It must be called in main or the + //arguments of main must be passed down to it if it is to allow + //command line entry. Otherwise the first argument may be set to + //zero and it may be used to enter the seed at run time from the + //console. +long clnga(int,char**,const char*,const char*); + //Allows a long to be entered from the console at run time if the + //first argument is set to zero. If the first two arguments are + //the arguments of main, then it allows command line entry with + //the flag that is the third argument and with a statement about + //the input that is the fourth argument. +double cdbla(int,char**,const char*,const char*); +char *cstra(int,char**,const char*,const char*); +long zrand(long); + //Produces a random long integer that is in the range [0,argument). + //Machinery of the random number generator. +void shuffle(long n,long *idx); //Randomly shuffles an array of longs. +void dshuffle(long n,long *idx); //Randomly shuffles an array of longs. + //Improved version suggested by Don Comeau +long rnd(double); + //Rounds off a double and returns the integer that results. + + //Reads in a string including white space and ends the string + //just before the character a. +inline int get_string(char *cnam,ifstream &ifile,char a){ + char *pch = cnam; + long j=1; + + start: + if((*(pch++)=ifile.get())!=EOF){ + if(*(pch-1)==a){pch--;goto start;} + while(((*(pch++)=ifile.get())!=a)&&(j +void sSort(const long ix, X *idx){ + long k, j, ir, i; + X rra; + + if(ix<=1)return; + + k=(ix>>1); + ir=ix-1; + for(;;) { + if(k>0) { + rra=idx[--k]; + } + else { + rra=idx[ir]; + idx[ir] = idx[0]; + if(--ir ==0) { + idx[0]=rra; + return; + } + } + i=k; + j=((k+1)<<1)-1; + while(j<=ir) { + if(j +void sRort(const long ix, X *idx){ + long k, j, ir, i; + X rra; + + if(ix<=1)return; + + k=(ix>>1); + ir=ix-1; + for(;;) { + if(k>0) { + rra=idx[--k]; + } + else { + rra=idx[ir]; + idx[ir] = idx[0]; + if(--ir ==0) { + idx[0]=rra; + return; + } + } + i=k; + j=((k+1)<<1)-1; + while(j<=ir) { + if(jidx[j+1])) ++j; + if(rra>idx[j]) { + idx[i]=idx[j]; + j +=(i=j)+1; + } + else j=ir+1; + } + idx[i]=rra; + } +} + +template +void hSort(const long n, X *ra, Y *rb) { + long k, j, ir, i; + X rra; + Y rrb; + + if(n<=1)return; + + k=(n>>1); + ir=n-1; + for(;;) { + if(k>0) { + rra=ra[--k]; + rrb=rb[k]; + } + else { + rra=ra[ir]; + rrb=rb[ir]; + ra[ir] = ra[0]; + rb[ir] = rb[0]; + if(--ir ==0) { + ra[0]=rra; + rb[0]=rrb; + return; + } + } + i=k; + j=((k+1)<<1)-1; + while(j<=ir) { + if(j +void hSort(const long n, X *ra, Y *rb, Z *rc) { + long k, j, ir, i; + X rra; + Y rrb; + Z rrc; + + if(n<=1)return; + + k=(n>>1); + ir=n-1; + for(;;) { + if(k>0) { + rra=ra[--k]; + rrb=rb[k]; + rrc=rc[k]; + } + else { + rra=ra[ir]; + rrb=rb[ir]; + rrc=rc[ir]; + ra[ir] = ra[0]; + rb[ir] = rb[0]; + rc[ir] = rc[0]; + if(--ir ==0) { + ra[0]=rra; + rb[0]=rrb; + rc[0]=rrc; + return; + } + } + i=k; + j=((k+1)<<1)-1; + while(j<=ir) { + if(j +void hRort(const long n, X *ra, Y *rb) { + long k, j, ir, i; + X rra; + Y rrb; + + if(n<=1)return; + + k=(n>>1); + ir=n-1; + for(;;) { + if(k>0) { + rra=ra[--k]; + rrb=rb[k]; + } + else { + rra=ra[ir]; + rrb=rb[ir]; + ra[ir] = ra[0]; + rb[ir] = rb[0]; + if(--ir ==0) { + ra[0]=rra; + rb[0]=rrb; + return; + } + } + i=k; + j=((k+1)<<1)-1; + while(j<=ir) { + if(j ra[j+1]) ++j; + if(rra>ra[j]) { + ra[i]=ra[j]; + rb[i]=rb[j]; + j +=(i=j)+1; + } + else j=ir+1; + } + ra[i]=rra; + rb[i]=rrb; + } +} + +template +void hRort(const long n, X *ra, Y *rb, Z *rc) { + long k, j, ir, i; + X rra; + Y rrb; + Z rrc; + + if(n<=1)return; + + k=(n>>1); + ir=n-1; + for(;;) { + if(k>0) { + rra=ra[--k]; + rrb=rb[k]; + rrc=rc[k]; + } + else { + rra=ra[ir]; + rrb=rb[ir]; + rrc=rc[ir]; + ra[ir] = ra[0]; + rb[ir] = rb[0]; + rc[ir] = rc[0]; + if(--ir ==0) { + ra[0]=rra; + rb[0]=rrb; + rc[0]=rrc; + return; + } + } + i=k; + j=((k+1)<<1)-1; + while(j<=ir) { + if(j ra[j+1]) ++j; + if(rra>ra[j]) { + ra[i]=ra[j]; + rb[i]=rb[j]; + rc[i]=rc[j]; + j +=(i=j)+1; + } + else j=ir+1; + } + ra[i]=rra; + rb[i]=rrb; + rc[i]=rrc; + } +} + + +//Function to convert a long to a null terminated string. +void long_str(char *cnam,long n); + +//Function to convert a string with null termination +//to a long. +void str_long(char *cnam,long &n); + +//Function to convert first two char of string to an +//integer. Should be an ASCII null terminated string +int trac(const char *str); + +template +void xshuffle(Y n,Z *idx){ //Randomly shuffles an array of longs. + Y i,k; + Z u; + for(i=n-1;i>0;i--){ + k=(Y)zrand((long)i+1); + u=idx[i]; + idx[i]=idx[k]; + idx[k]=u; + } +} + +template +void dxhuffle(long n,Z *idx){ //Randomly shuffles an array type Z*. + long i,k; + Z xx; + for(i=n-1;i>0;i--){ + k=zrand(i+1); + xx=idx[i]; + idx[i]=idx[k]; + idx[k]=xx; + } +} + +} +#endif diff --git a/gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/version_vocab/vocab1.txt b/gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/version_vocab/vocab1.txt index 3003962c8ccb573394c76ba6f54f787421be83db..5deeecd692e1a75d02d464a96685dbbf59ba8908 100644 --- a/gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/version_vocab/vocab1.txt +++ b/gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/version_vocab/vocab1.txt @@ -1,28895 +1,3 @@ -[PAD] -[UNK] -[CLS] -[SEP] -[MASK] -! -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -¡ -¢ -£ -¤ -¥ -¦ -§ -¨ -© -ª -« -¬ -® -¯ -° -± -² -³ -´ -µ -¶ -· -¸ -¹ -º -» -¼ -½ -¾ -¿ -× -ß -æ -ð -÷ -ø -þ -đ -ħ -ı -ĸ -ł -ŋ -œ -ƅ -ƈ -ƍ -ƒ -ƙ -ƛ -ƞ -ƭ -ƴ -ƶ -ƿ -ǀ -ǁ -ǂ -ǝ -ȣ -ȥ -ȵ -ȶ -ȼ -ɐ -ɑ -ɒ -ɓ -ɔ -ɕ -ɖ -ə -ɚ -ɛ -ɝ -ɡ -ɣ -ɤ -ɥ -ɨ -ɩ -ɪ -ɬ -ɭ -ɮ -ɯ -ɲ -ɳ -ɵ -ɷ -ɸ -ɹ -ɻ -ɾ -ɿ -ʀ -ʁ -ʂ -ʃ -ʅ -ʈ -ʉ -ʊ -ʋ -ʌ -ʎ -ʐ -ʑ -ʒ -ʔ -ʕ -ʘ -ʝ -ʟ -ʦ -ʧ -ʰ -ʱ -ʲ -ʷ -ʹ -ʺ -ʼ -ʾ -ˁ -˂ -˃ -˄ -ˆ -ˇ -ˉ -ː -ˑ -˖ -˘ -˙ -˚ -˜ -˝ -˞ -˟ -ˠ -ˤ -˪ -˭ -ˮ -˴ -ͳ -ͻ -΀ -΁ -΂ -΃ -΄ -΍ -α -β -γ -δ -ε -ζ -η -θ -ι -κ -λ -μ -ν -ξ -ο -π -ρ -ς -σ -τ -υ -φ -χ -ψ -ω -ϐ -ϑ -ϒ -ϕ -ϖ -ϝ -ϫ -ϭ -ϰ -ϱ -ϲ -ϵ -ϻ -ϼ -а -б -в -г -д -е -ж -з -и -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ь -э -ю -я -є -ѕ -і -ј -ћ -ѱ -ѳ -ѵ -ґ -қ -ҝ -ҡ -ҫ -ү -ұ -ҳ -һ -ӏ -ӕ -ә -ө -ӽ -ԏ -ԑ -՚ -־ -א -י -ץ -ר -ש -׳ -״ -، -ا -ة -ت -ح -خ -د -ر -ز -ش -ط -ع -ـ -ف -ل -م -و -٠ -١ -٢ -٤ -٪ -٭ -ۥ -ߚ -ߝ -ࣈ -क -च -ण -फ -र -ल -ा -० -ก -ข -ง -ต -ท -น -บ -พ -ฟ -ภ -ย -ร -ล -ว -ศ -ห -อ -ะ -า -฿ -แ -ใ -༌ -ခ -ᄀ -ᄁ -ᄂ -ᄃ -ᄄ -ᄅ -ᄆ -ᄇ -ᄉ -ᄋ -ᄌ -ᄎ -ᄏ -ᄐ -ᄑ -ᄒ -ᅟ -ᅡ -ᅢ -ᅣ -ᅥ -ᅦ -ᅧ -ᅨ -ᅩ -ᅪ -ᅬ -ᅭ -ᅮ -ᅯ -ᅰ -ᅱ -ᅲ -ᅳ -ᅴ -ᅵ -ᆨ -ᆩ -ᆪ -ᆫ -ᆭ -ᆯ -ᆲ -ᆷ -ᆸ -ᆺ -ᆻ -ᆼ -ᆾ -ᇀ -ᇂ -ᇞ -᛫ -ᴂ -ᴅ -ᴋ -ᴍ -ᴏ -ᴐ -ᴓ -ᴨ -ᴪ -ᴳ -ᴼ -ᵒ -ᵗ -ᵝ -ᵧ -ᵪ -ᵯ -ᵹ -ᶲ -᾽ -᾿ -῾ -‑ -‒ -— -― -‖ -‘ -’ -‚ -‛ -“ -” -„ -‟ -† -‡ -• -‥ -… -‧ -‰ -‱ -′ -″ -‴ -‹ -› -※ -‾ -‿ -⁁ -⁃ -⁄ -⁎ -⁓ -⁗ -⁰ -ⁱ -⁴ -⁵ -⁶ -⁷ -⁸ -⁹ -⁺ -⁻ -⁽ -⁾ -ⁿ -₀ -₁ -₂ -₃ -₄ -₅ -₆ -₇ -₈ -₉ -₋ -₌ -ₐ -ₓ -₣ -₤ -₦ -₩ -€ -₱ -₵ -₹ -₺ -ℂ -℃ -℅ -ℇ -ℋ -ℏ -ℐ -ℑ -ℒ -ℓ -ℕ -№ -℗ -ℙ -ℛ -ℜ -ℝ -℞ -℠ -™ -ℤ -℧ -ℬ -ℰ -ℱ -ℳ -ℴ -ℵ -ℽ -ⅅ -⅓ -⅔ -⅗ -⅙ -⅚ -⅛ -⅜ -ⅰ -ⅱ -ⅲ -ⅳ -ⅴ -ⅵ -ⅶ -ⅷ -ⅸ -ⅹ -ⅺ -ⅻ -← -↑ -→ -↓ -↔ -↕ -↗ -↘ -↙ -↝ -↦ -↷ -↼ -⇀ -⇄ -⇆ -⇋ -⇌ -⇐ -⇑ -⇒ -⇓ -⇔ -⇝ -⇨ -∀ -∂ -∅ -∆ -∇ -∈ -∊ -∋ -∎ -∏ -∐ -∑ -− -∓ -∕ -∖ -∗ -∘ -∙ -√ -∛ -∝ -∞ -∟ -∠ -∡ -∢ -∣ -∥ -∧ -∨ -∩ -∪ -∫ -∴ -∶ -∷ -∸ -∼ -∽ -∾ -≂ -≃ -≅ -≈ -≊ -≌ -≍ -≏ -≐ -≑ -≒ -≔ -≙ -≡ -≣ -≤ -≥ -≦ -≧ -≨ -≪ -≫ -≲ -≳ -≺ -≻ -≽ -≿ -⊂ -⊃ -⊆ -⊕ -⊖ -⊗ -⊘ -⊙ -⊞ -⊟ -⊠ -⊣ -⊤ -⊥ -⊿ -⋄ -⋅ -⋆ -⋊ -⋘ -⋙ -⋜ -⋝ -⋮ -⋯ -⌀ -⌈ -⌉ -⌊ -⌋ -⌜ -⌝ -⌢ -⌣ -⍴ -⍵ -⍺ -⎕ -⎼ -␣ -① -② -③ -④ -⑤ -⑥ -⑦ -⑧ -⑨ -⑩ -⑴ -⑵ -⑶ -ⓒ -ⓝ -ⓡ -─ -│ -├ -┤ -┬ -┴ -═ -║ -╪ -╳ -█ -░ -▒ -▓ -■ -□ -▪ -▫ -▬ -▯ -▲ -△ -▴ -▵ -▶ -▸ -▹ -► -▼ -▽ -▾ -▿ -◆ -◇ -◊ -○ -● -◦ -◻ -◽ -★ -☆ -☉ -☐ -☓ -☺ -♀ -♂ -♢ -♦ -♪ -♭ -♯ -✓ -✕ -✜ -✧ -✴ -✶ -➁ -➔ -➝ -➢ -➤ -⟂ -⟦ -⟧ -⟨ -⟩ -⟵ -⟶ -⦁ -⦵ -⧧ -⧸ -⧹ -⨉ -⨪ -⨯ -⩒ -⩼ -⩽ -⩾ -⩿ -⪅ -⪆ -⪕ -⪖ -⪝ -⪡ -⪢ -⫽ -⬄ -⬜ -⬡ -⬢ -⬰ -⬽ -ⱪ -⸱ -⿿ -、 -。 -〈 -〉 -《 -》 -「 -」 -【 -】 -〓 -〔 -〕 -〖 -〗 -〜 -〝 -〟 -ア -カ -ヒ -メ -リ -レ -・ -ㅣ -ㆍ -㎂ -㎍ -㎖ -㎛ -㎝ -㎟ -㎡ -㎶ -㒐 -丸 -参 -囊 -大 -射 -方 -气 -汤 -注 -消 -液 -清 -潜 -瘀 -益 -祛 -粒 -肝 -肾 -胶 -芪 -补 -颗 -饮 -骨 -꞉ -꞊ -ꞌ -ꞵ -ff -fi -fl -ffi -ffl -ſt -﴾ -﴿ -︰ -﹒ -﹛ -﹢ -﹣ -﹤ -﹥ -﹩ -$ -% -& -' -( -) -* -+ -, -- -. -: -; -< -= -> -? -[ -] -^ -_ -c -i -m -p -v -{ -| -} -~ -、 -・ -ア -オ -タ -モ -ᅲ -£ -¬ -¥ -₩ - -𝐑 -𝐟 -𝐫 -𝐴 -𝐸 -𝑃 -𝑐 -𝑑 -𝑒 -𝑓 -𝑖 -𝑛 -𝑜 -𝑟 -𝑡 -𝑥 -𝒆 -𝒙 -𝒞 -𝒟 -𝒦 -𝒩 -𝒪 -𝒫 -𝒮 -𝒯 -𝓟 -𝓣 -𝔇 -𝔐 -𝔹 -𝕊 -𝕜 -𝖱 -𝘗 -𝛂 -𝛆 -𝛼 -𝛽 -𝜀 -𝜃 -𝜅 -𝜇 -𝜋 -𝜌 -𝜒 -𝜖 -𝜗 -𝝁 -##y -##q -##e -##0 -##5 -##3 -##8 -##a -##g -##p -##c -##t -##4 -##6 -##7 -##2 -##h -##d -##i -##n -##o -##s -##r -##w -##u -##k -##b -##f -##l -##m -##z -##v -##1 -##9 -##⋮ -##° -##x -##j -##≈ -##₃ -##∙ -##λ -##⁻ -##μ -##ø -##∼ -##→ -##δ -##× -##™ -##∕ -##≫ -##β -##± -##₂ -##₆ -##⩽ -##€ -##® -##∓ -##α -##с -##γ -##£ -##≡ -##⋯ -##π -##═ -##⋆ -##ρ -##µ -##σ -##₅ -##₄ -##˚ -##ᅴ -##ᄇ -##ᅡ -##ᆼ -##ᄋ -##ᅲ -##ᄌ -##ᅱ -##₋ -##fi -##´ -##₇ -##ε -##ß -##+ -##η -##© -##б -##³ -##е -##ı -##□ -##∗ -##∶ -##⁄ -##− -##ϕ -##⊂ -##˙ -##º -##∆ -##↓ -##κ -##∞ -##⁺ -##₈ -##fl -##ι -##⁸ -##∷ -##⋅ -##² -##⁰ -##æ -##↔ -##₁ -##ζ -##τ -##÷ -##= -##∘ -##¹ -##⁷ -##⁶ -##χ -##ω -##ϒ -##ℏ -##ˆ -##ф -##о -##↑ -##⩾ -##φ -##¬ -##ϵ -##υ -##₉ -##θ -##ψ -##─ -##├ -##≪ -##ł -##♂ -##ℓ -##➔ -##ⅱ -##⁴ -##≳ -##ν -##~ -##√ -##≲ -##< -##к -##⁹ -##⊃ -##н -##∝ -##₀ -##⁵ -##¯ -##← -##ff -##΄ -##а -##ə -##◦ -##≃ -##≅ -##œ -##ɛ -##⧹ -##⍺ -##ⅰ -##ɑ -##ο -##⇒ -##♀ -##˂ -##○ -##▴ -##> -##⊗ -##р -##в -##ж -##℃ -##∈ -##∧ -##⊙ -##¢ -##м -##ᅭ -##ᅵ -##ᆫ -##ᅦ -##∑ -##у -##з -##л -##і -##≦ -##≧ -##ˮ -##≒ -##﹥ -##≊ -##¼ -##△ -##ᄅ -##ᅩ -##ᅥ -##ᆨ -##đ -##⊥ -##т -##ⅳ -##˃ -##ƒ -##ξ -##│ -##¾ -##ⅲ -##ð -##ϲ -##ⓒ -##␣ -##┴ -##∥ -##▪ -##⧧ -##𝛽 -##➝ -##ɣ -##∇ -##ш -##ᶲ -##и -##г -##♦ -##∫ -##∩ -##½ -##ſt -##⇋ -##ⅷ -##˜ -##⧸ -##¨ -##⨯ -##∣ -##ª -##∨ -##ĸ -##● -##▬ -##¥ -##ⅴ -##⇑ -##⇓ -##⇆ -##⬢ -##∏ -##ɕ -##℅ -##х -##⇌ -##ː -##∪ -##⊕ -##ǀ -##˖ -##ɒ -##⬜ -##ᆯ -##ᆸ -##д -##𝐑 -##⇀ -##˝ -##≐ -##þ -##↷ -##ɹ -##ʃ -##ƞ -##⟶ -##⇄ -##┤ -##ц -##я -##ߚ -##¸ -##п -##∴ -##⇔ -##№ -##ʱ -##↦ -##ͳ -##⪅ -##▵ -##⇝ -##ɚ -##ᆺ -##ɐ -##ɪ -##② -##ҡ -##⊠ -##∂ -##ꞌ -##≍ -##ˇ -##▒ -##ю -##☓ -##∖ -##ˉ -##┬ -##⎼ -##ₓ -##ς -##〓 -##¦ -##ᵒ -##⋘ -##ⁱ -##ᆷ -##ᅳ -##⅓ -##㎍ -##■ -##♯ -##΀ -##≿ -##ү -##ᄉ -##ϭ -##˞ -##ɸ -##∅ -##⪢ -##↝ -##ᴂ -##𝛆 -##ᵝ -##˄ -##ᵧ -##ᅪ -##ᄒ -##ᄀ -##ᅧ -##ᄃ -##∊ -##ᅟ -##¤ -##☉ -##ʰ -##ⓡ -##ᄆ -##ϐ -##ר -##א -##ש -##י -##| -##▓ -##ч -##ь -##ᄑ -##ᆩ -##ᄂ -##ˑ -##⊖ -##⦁ -##ℑ -##ℇ -##ⅺ -##⋜ -##᾿ -##ᅢ -##¥ -##║ -##є -##ₐ -##ᅮ -##ʼ -##қ -##ҫ -##ฟ -##น -##𝒯 -##⊟ -##฿ -##ɔ -##ʁ -##⋙ -##ʌ -##ᅬ -##ⅶ -##⪡ -##ϰ -##ᆾ -##⊘ -##ـ -##ᇞ -##▸ -##✕ -##⋄ -##ℤ -##ㅣ -##ffi -##↼ -##อ -##ง -##ᵗ -##ⅸ -##ɖ -##ffl -##ʊ -##⿿ -##╳ -##﹢ -##΍ -##▿ -##ˁ -##˭ -##≺ -##₣ -##ϖ -##↕ -##⬰ -##ᄏ -##⬡ -##⪆ -##↘ -##ᅰ -##₵ -##า -##ใ -##ต -##บ -##⊿ -##𝒞 -##ℒ -##ɳ -##ѱ -##ل -##ƛ -##ϱ -##⨉ -##𝑖 -##𝑓 -##𝑒 -##𝑟 -##𝑛 -##𝑡 -##𝑜 -##ว -##⋝ -##₌ -##ᄐ -##ℝ -##ŋ -##ᆻ -##ก -##∽ -##ѕ -##ӏ -##٢ -##᾽ -##ħ -##⬽ -##ɡ -##ㆍ -##^ -##ɩ -##ᇂ -##℧ -##ⅻ -##ǁ -##ɷ -##ͻ -##⌀ -##𝑐 -##ѵ -##ɤ -##㎛ -##˘ -##ʒ -##꞊ -##ा -##क -##𝜀 -##ᴏ -##ⅵ -##¬ -##タ -##モ -##ℱ -##ᴼ -##ア -##ʲ -##ј -##د -##م -##ة -##♭ -##ᴋ -##΃ -##₺ -##℗ -##꞉ -##ʹ -##ᴐ -##ز -##ع -##ر -##و -##ᆭ -##↗ -##﹤ -##ᅨ -##ƴ -##𝑑 -##メ -##リ -##カ -##≽ -##≣ -##ʾ -##ᄎ -##ᅯ -##΁ -##⊆ -##ƙ -##ʂ -##ℕ -##ش -##ا -##ف -##⟂ -##≏ -##ᆲ -##ᆪ -##ʉ -##オ -##℠ -##m -##ƭ -##ल -##⊣ -##ˤ -##ᄄ -##һ -##∀ -##ᅣ -##░ -##ɾ -##∟ -##ꞵ -##≔ -##١ -##ت -##✓ -##ʘ -##▼ -##ɮ -##i -##v -##ʐ -##٠ -##≑ -##₦ -##ヒ -##ǂ -##𝜃 -##℞ -##⌢ -##ϑ -##ߝ -##ɵ -##⩿ -##ȥ -##ⱪ -##⅔ -##ȼ -##レ -##ⁿ -##˴ -##𝒆 -##♪ -##☆ -##⨪ -##𝓣 -##█ -##٤ -##⩒ -##ɬ -##ƈ -##ᴍ -##⌣ -##𝜅 -##ʷ -##③ -##≤ -##ћ -##∠ -##э -##ѳ -##p -##ℳ -##◊ -##र -##ण -##𝐟 -##∎ -##⇐ -##ʎ -##☺ -##⟵ -##ℜ -##㎖ -##ˠ -##แ -##ล -##ะ -##ข -##ภ -##พ -##ย -##ร -##ท -##ศ -##ʺ -##ⅹ -##⫽ -##① -##∋ -##ۥ -##ℂ -##≻ -##ᴪ -##ǝ -##ᄁ -##΂ -##ȣ -##𝜋 -##ɓ -##ɯ -##ᴓ -##ө -##⑩ -##ℵ -##₩ -##★ -##𝛼 -##ᇀ -##④ -##ti -th -##er -##on -##en -##es -##ed -##in -the -##al -in -an -##or -of -##at -##an -##ro -##tion -and -##as -##it -##ic -##ar -##is -##ent -##ec -##re -##el -##ing -to -##ation -##ul -##et -##ol -##om -##ac -##ur -##os -##ith -##us -##ve -##id -##ati -with -##ly -##le -con -st -##th -##ere -##ter -##ig -pro -for -re -##ts -##uc -##od -##il -##em -as -##if -was -##ated -##un -##ess -##am -##ce -##im -##tr -##ow -ex -is -res -##um -##oc -were -##op -##ut -##tic -##ity -##ir -##ion -on -##ents -ac -com -##ate -that -##ab -##ot -##og -al -be -##ys -ch -##ud -##ev -##ag -##ell -by -##ad -##ain -##ap -##rom -wh -##ical -##ect -##ant -or -pr -##ers -##ib -su -##pl -##ine -##ment -pati -##ase -##iv -##ep -im -##tin -tr -##ff -dis -at -##igh -ad -us -we -##tiv -inc -this -##ph -comp -patients -##ence -are -##qu -sp -en -from -##ure -cell -stud -##tive -##iz -##ay -##ign -##ial -sh -un -##ific -ne -ind -##te -met -##ies -##ch -##ations -per -##fer -##per -##ress -rec -##ous -##ort -##ip -##orm -##ans -ass -##ens -ph -eff -##ear -##evel -##ore -resul -##rou -##ge -##ons -##oun -cl -##ia -##ely -gen -##est -ap -##act -##ting -sign -##bs -##enti -inf -##ular -##ary -##ition -##yp -dif -##oth -##olog -par -bet -##erm -high -pre -inter -##ese -rel -##ased -can -##cl -cont -tre -##ver -not -pl -results -ha -##ors -co -de -##ding -imp -##yl -these -meth -ev -study -##ym -resp -car -exp -##ich -pos -##roup -##ist -me -##st -##der -differ -str -##enc -trans -##ance -##ose -incre -##ative -di -group -signific -##ath -am -##een -##duc -anal -##ection -sur -##ied -significant -cells -reg -mod -##and -##ast -##ong -##ual -det -af -rep -spec -pres -##iti -cons -activ -all -method -which -##ween -##able -have -##age -between -sc -inv -show -##vi -##rol -mic -##all -##ory -##00 -##oci -ab -int -##end -##ide -after -ar -##ression -##oll -##ound -cor -##ater -treat -##atin -he -##ros -than -no -##ach -##ved -##usion -##ug -compar -##ects -tw -##ally -##ari -##ected -##tein -##one -se -##alu -##yst -##ther -level -anti -##ight -##yn -##ates -##unc -med -app -ag -##ever -##ases -##ech -##cr -associ -##ost -##ive -treatment -##ak -##ective -##our -using -##form -it -##tim -le -##ility -bl -##ome -but -##ox -has -been -protein -##entr -two -##unction -des -dur -##ted -##na -control -analys -##ulation -el -ob -##ood -##ized -bi -##ating -##ard -##evelop -##ue -more -low -dec -develop -hy -may -##mun -##ang -##anc -both -##osis -##dition -##so -mon -sub -clin -##ree -ca -year -ser -##action -dise - -dat -fl -their -red -##ass -##yt -##hib -##cess -also -tim -dep -##gg -##pt -##ill -##ob -##acter -##ri -##ures -syst -used -concl -##ub -##cer -##uced -during -##ectiv -there -id -##ined -##ological -##erap -had -##agn -##iss -function -##ism -fact -meas -dr -##uld -system -respons -##ogen -man -##ew -vari -gl -clinical -obs -data -em -##br -##out -suc -incl -##gen -##ond -heal -##ution -##atic -es -how -methods -out -associated -##pp -under -sim -##urr -present -form -##plic -##ack -analysis -sugg -sam -over -acc -##aph -##etic -##ium -one -model -conclusion -includ -ris -foll -health -typ -##uct -##ult -increased -produc -use -##inding -activity -compared -significantly -pol -##ays -disease -non -conc -up -syn -##ained -inhib -expression -levels -ins -obser -##gr -##ven -other -end -found -##ace -##ole -##ization -follow -time -##ici -qu -##entif -##ild -oc -##eth -##ite -##ential -perform -##itive -immun -20 -##ogr -##und -risk -fr -exper -##tig -##ane -dem -evalu -gr -determ -cr -##ants -pot -##ail -different -studies -effect -br -hum -identif -effects -showed -##ments -indic -##ile -its -based -exam -##atory -##ish -appro -##tal -prov -##row -##estig -therap -investig -char -partic -concentr -##oid -##als -however -tum -sm -path -ma -report -min -sel -##mon -##ery -##ities -most -##ences -##adi -specific - -infl -##ectively -diagn -##onstr -##ma -assess -##ple -contr -rem -##esis -##ency -ep -when -##ination -our -years -##av -subs -##idence -##etr -##tern -def -op -mul -supp -##ormal -acid -age -##ren -into -decre -##ability -only -##ange -well -higher -intr -##ograph -##tical -##oph -three - -##erg -##ork -cancer -##iqu -##ently -test -##ions -##ited -peri -##ology -##ber -##inal -pred -gener -mal -human -##echan -##ept -induced -##ature -##arg -##ement -such -##af -mol -new -##ible -impro -observed -character -import -num -sec -struct -mechan -##til -##ord -groups -##ix -##ik -patient -##ov -##ugh -##rel -fir -sens -demonstr -child -##rop -occ -col -##res -ox -chang -loc -##tid -##ron -##gh -rate -##aining -blood -kn -##lex -cases -##ute -post -related -##use -surg -process -##cop -##ark -tiss -micro -##ok -##ured -term -rat -who -##tit -fe -first -##view -##oma -fur -suggest -valu -aff -##uction -gene -response -mem -disc -addition -##amm -##echn -##ural -cal -po -##rough -neu -##ough -##dro -type -##ten -##osph -inj -those -grow -##och -development -##osed -##own -techn -##round -##ior -among -sol -##ful -respectively -##its -##ulated -condition -conf -##ines -mean -care -cd -##vention -##ental -molec -recept -##ucle - -##ital -##les -month -drug -factors -del -through -mg -##ains -##arly -pop -total -hyp -ps -ol -##omen -changes -il -##fore -increase -prim -cyt -bas -##ize -correl -role -invol -00 -potential -##di -##ivid -obj -##endent -prop -long -prob -experim -##ould -phys -##ational -##asc -##tained -plas -subj -##xim -fre -normal -gre -##crib -##urs -finding -my -##earch -medi -mus -important -##ived -stim -back -adm -##eng -lower -main -##vers -predic -##roph -##omy -could -simil -##led -mechanism - -##over -##istic -##ha -ii -##ex -##ek -within -sym -hist -##ds -##cin -performed -##gan -infection -so -##air -where -##eter -##ody -sequ -number -##ene -comb -frequ -##osp -lik -extr -sever -hyper -therapy -95 -review -##ms -conclusions -prog -##ption -##ps -outc -less -coll -##ider -while -multi -##ade -flu -##cle -mm -cap -each -##ether -children -requ -antib -hydro -bec - -treated -reported -##iver -lim -including -##val -similar -##ages -period -poss -vir -growth -tumor -do -fib -some -complex -aim -##orph -ext -##ores -##ock -women -##vious -##omes -positive -##arge -radi -pur -describ -pat -reve -##ne -200 -13 -dna -findings -##ric -##otic -cy -chem -factor -##ptom -prom -did -without -##ival -ret -30 -inhibit -##pha -week -mo -cardi -##ividual -mut -work -##ples -prot -##ground -tem -er -case -##zym -four -##iel -abs -##tivity -mice -they -##ily -individual -##ues -consider -dir -background -25 -##istr -##abol -tissue -calc -18 -days -approach -pe -av -##ascular -##ativ -surf -##ness -months -previous -vit -##ise -need -curr -##ale -##otyp -isol -##oper -phosph -identified -common -sing -measure -##ically -reveal -##ty -hem -maj -##ake -##iter -sev -major -vis -##oh -##ross -beha -synth -techniqu -##equ -organ -nucle -design -caus -##fl -##are -presence -research -##plet -behavi -analy -second -provid -mark -##ality -differences -##ocyt -##med -##ified -further -##uss -surv -mater -vol -symptom -reduced -##atal -small -16 -metabol -24 -obtained -##ced -binding -hosp -class -serum -dose -##ning -influ -##plications -rats -##atis -##crip -cult -mat -##par -initi -evidence -##ymph -due -single -early -##ography -##ice -und -sk -day -lip -expl -inform -admin -##acy -about -species -##ety -##lish -##ung -##ties -enzym -##ft -decreased -sl -##ately -conditions -05 -receptor -##itis -bre -primary -alth -##ension -lymph -50 -although -os -subjects -##hip -##esting -##ogn -qual -diagnosis -enh -lab -ele -measured -##ites -concentration -##old -##ink -ml -rati -particip -##trib -##int -arter -##ins -ci -##ressed -proteins -studied -neg -##roscop -concentrations -fem -population -##ectr -##oles -##las -pers -##ither -##ests -resist -improve -100 -alpha -mr -lif -rece -hep -should -beta -##ication -range -##ateg -brain -pattern -genes -developed -investigated -##ials -##ych -objective -samples -##ength -##gf -plasma -line -press -lead -set -phen -membr -if -ec -following -##erv -interaction -revealed -##vir -##emia -##erc -##amic -aut -##aging -reaction -demonstrated -##arget -17 -der -discuss -analyz -coun -dependent -target -##plant -anim -surgery -ang -prol -##ening -##onal -vs -several -chron -surface -##eptid -##ulin -est -fam -relations -##hy -inflamm -##tan -##por -##osure -oper -##ind -gluc -##ield -##aneous -large -proper -molecular -hospital -stand -##roduc -elect -kg -adv -##ick -les -here -examined -##asing -complet -##cent -##oss -survival -appear -weight -prof -functional -chrom -ul -effective -hear -##ensity -evaluated -requir -information -liver -unders -proced -values -quan -deg -again -##tered -##az -prac -local -201 -phase -estim -polym -stress -##cs -electr -##tis -##ax -determined -region -##tions -val -standard -diff -ir -neur -##ire -bone -param -relationship -nec -whether -rates -01 -determine -acute -##ause -chronic -mass -body -free -cle -##arm -pain -sch -sus -diab -enhanc -water -occurr -either -equ -before -pul -life -##ving -intervention -bacter -will -001 -characteristic -pressure -conduc -##onic -gu -##omic -included -greater -ur -quality -##ian -same -various -##yr -##ivers -##icient -reduction -pathw -activation -structure -##uth -provide -injur -dim -##sp -thus -##viron -support -against -many -##plication -##ator -##ulations -manag -hel -ro -ratio -##ocytes -##orb -contrib -ren -shown -##lement -cir -multiple -vitro -nov -##ification -##iven -signal -##tain -exposure -##amin -symptoms -known -gly -current -##atively -direct -##operative -sep -medical -dom -##ules -purp -psych -prec -exc -nan -whereas -vi -negative -genetic -production -because -distrib -carb -##andom -##eters -##ission -ach -fail -flow -formation -21 -fac -##eu -sal -environ -##rome -detected -##amine -adj -random -compound -##ostic -##ellular -rele -40 -strong -##gram -##ensive -##tric -##ateral -##uted -recom -##ists -##ometr -possible -pub -transcrip -mort -##itivity -##ude -association -loss -part -nm -##up -##emic -pap -any -##cept -reduc -compon -##ae -##ct -ms -##icity -change -repres -##ocy -##ynamic -##nal -controls -membrane -success -order -##eration -confir -sex -involved -muc -weeks -cent -##otherap -six -performance -muscle -responses -avail -general -size -##atus -mechanisms -##tif -difference -oxid -##gn -commun -mortality -particular -lo -virus -##ablish -temper -##ient -properties -tox -hypoth -indep -five -establish -strateg -assessed -cycl -models -experi -lung -statis -consist -##ann -resid -##uk -synd -differenti -##elial -##istration -state -increasing -overall -##atures -behavior -##ision -carcin -purpose -identify -men -000 -##ochem -22 -sin -old -60 -##ward -novel -management -##olution -contin -cm -regul -##aw -heart -##aps -##ised -super -diet -poly -presented -being -analyzed -proble -amin -##inant -scre -rap -mac -##ologic -##eral -neuro -magn -ct -surgical -link -##estion -contrast -##eutic -##app -area -like -self -imaging -##opath -death -phot -foc -##ording -intra -frequency -viv -environment -ov -##acc -eth -##iciency -##iving -evaluate -volum -allow -##reg -##itor -active -dys -outcomes -##ie -circ -preval -##dr -##active -result -recent -therefore -##otherapy -parameters -severe -outcome -##eric -centr -peptid -stage -value -vivo -very -isolated -##elet -add -##ographic -tra -distribution -##ergy -short -indicate -technique -decrease -##ust -mit -enzyme -##oses -resistance -##odies -##etes -enc -##verage -##lor -detection -participants -morph -nit -index -sw -injury -experimental -renal -optim -##tinal -##orts -systems -male -hom -induc -23 -administration -block -site -cross -previously -##ides -inhibition -stimul -accur -sensitivity -vers -diseases -##icular -ve -##ogenesis -dev -characteristics -##ended -healthy -##osition -##ably -left -tested -syndrome -##tex -##amp -described -prepar -exhib -gas -assay -therapeutic -pregn -rapid -##ocial -bene -##ule -att -highly -##asis -considered -animals -##rh -##tegr -glucose -##ancy -##iat -##ull -##oplas -artic -fat -mediated -investigate -maxim -rh -benef -fil -correlation -incidence -##inc -followed -atten -available -containing -199 -##ours -##eding -family -##iological -##ns -lesions -cere -sequence -then -##tically -##de -status -##ode -saf -##ylation -tub -##ances -better -##ices -##ogenic -types -kin -sample -da -##ids -##ouse -##ters -improved -##ially -##of -prolif -##red -hiv -relative -##roscopy -##inary -caused -##olic -##opt -impact -28 -##ops -##ount -derived -##ner -individuals -##ocard -energy -breast -##ulf -##ually -##ands -surve -nor -required -##itation -her -effic -##ces -via -inflammatory -##otype -##verse -insulin -cause -pharm -te -release -solution -##els -integr -temperature -##iev -cardiac -field -##plied -hypert -##ygen -pc -##ey -##acellular -adh -secre -percent -net -amino -record -##terior -bel -ultr -35 -sites -ability -dist -poor -spect -mrna -##omp -expressed -influence -approxim -combination -assessment -produced -analyses -conducted -physical -prevalence -light -##arc -##eline -program -##therm -##ves -received -comparis -indicated -practi -##itu -useful -length -liter -know -focus -volume -##roc -##line -##oy -complications -##ectomy -them -given -elev -content -refer -##tial -skin -diabetes -##pr -tak -##tine -##monary -##ressive -##ling -##pec -monitor -abn -evaluation -sour -metast -visual -##ortion -##romb -##ibility -drugs -26 -ver -density -larg -features -##cence -adult -recover -average -independent -##ols -likely -even -correlated -heter -efficacy -scale -ill -valid -according -hc -immune -##ysis -synthesis -tumors -cat -continu -quantit -lig -dam -impair -mar -separ -compounds -diagnostic -acet -hr -##abil -##enz -injection -abnormal -applied -##ones -play -question -characterized -coh -often -apopt -orig -##uble -recogn -##izing -thir -ventr -good -demonstrate -initial -cop -off -##vent -oral -##resp -##order -##go -ic -cre -introduc -chall -vascular -##cep -##mit -social -recurr -activities -medic -oste -central -transcription -angi -transfer -fraction -##orders -##ators -##tanding -##oung -might -young -##vement -spectr -27 -##medi -least -paper -mix -45 -made -##mic -##orption -failure -tissues -glyc -pa -##otox -predict -pd -util -##ored -measures -##tle -cellular -##ising -carr -##ecting -underw -enhanced -represent -cs -##enced -32 -measurements -receptors -##rees -disorders -score -transplant -biological -nat -female -aden -areas -##thermore -furthermore -experiments -cri -procedure -##ledge -##yro -food -##ask -##ta -stimulation -asp -understanding -cur -epid -techniques -subsequ -literature -times -regulation -successful -knowledge -##ales -##art -90 -discussed -emerg -moder -strains -rna -diss -adul -80 -minim -regression -31 -observ -##ables -##work -combined -regions -degrees -36 -processes -##rob -position -repe -dynamic -##tib -##omal -approximately -##lic -tests -since -fin -ed -oxygen -few -underwent -baseline -arr -patterns -29 -pathway -##ocyte -median -would -culture -##uture -##bers -##igr -events -##rogen -step -affected -##ustr -bm -proposed -##olar -48 -artery -air -prior -thromb -nurs -progn -smok -occurred -pulmonary -resulted -ey -right -collected -confirmed -limited -antibodies -fluores -##ont -biom -coron -ess -##iotic -suggested -##rosp -educ -##opro -antibody -interactions -pcr -##omas -##eness -##erve -antigen -neurons -cogn -##da -##ier -##ochemical -achiev -epith -##ytic -section -ax -##ower -sensitive -methyl -electro -mid -key -33 -criter -constr -##lying -hydrox -agg -##iatric -##otor -horm -##till -suggests -##ru -complete -pharmac -labor -network -comparison -practice -versus -##ler -lack -auth -gam -refl -materials -serv -inhibited -carcinoma -##utr -suggesting -increases -alter -recomm -structures -endoth -application -established -setting -trials -70 -seen -guid -additional -exerc -chemical -##ke -place -hal -vacc -history -down -invas -isch -catal -altern -periph -cer -##ovascular -controlled -##urn -37 -malign -screening -coronary -relev -across -alc -iii -provides -chain -lay -accum -##10 -examination -strain -corr -transp -##ave -mir -duration -adults -scores -displ -electron -##ious -disorder -bir -action -prost -promo -##ploy -depend -mouse -espec -plat -iv -substr -especially -hand -secondary -##onding -future -##ining -##istered -##inetic -mel -training -capac -##ogene -seg -75 -##acts -corresp -selected -eight -proliferation -consum -##rosis -##terol -sil -defined -##nf -rad -##oscop -##oxid -##ethyl -##fusion -##emb -##last -agents -sum -##bral -sle -##ii -alcoh -##iratory -##pa -inhibitor -lat -colon -peak -##grad -detect -polymer -plac -34 -##utes -bar -##ondr -et -02 -kid -recently -help -toward -article -still -##set -ultras -##aff -##ture -desp -##me -basis -kidne -side -##ld -ref -go -ant -despite -##orbid -cours -components -##ying -damage -glut -lit -head -##estinal -lipid -leuk -molecules -chlor -describe -structural -##agon -suff -postoperative -clear -bil -phosphor -critical -improvement -apoptosis -##bry -prosp -mes -fold -hip -peripheral -diffic -##eph -cost -##ibr -graf -##tre -adap -criteria -distin -##ets -metabolic -##agen -fer -dop -kinase -ion -necess -chann -##ancre -real -##isting -##yroid -signaling -exercise -##ochondr -chromos -populations -calcium -don -peptide -bio -infected -provided -daily -##iting -depression -doc -alone -nerve -differentiation -##ectives -aged -task -strategies -##odes -load -prevent -trial -sem -##occ -great -affect -ra -series -functions -##ergic -does -therm -rang -na -od -##esth -interval -ampl -fet -activated -consistent -facil -point -situ -##entially -embry -concer -sn -##anol -essential -perce -##pre -testing -42 -eng -##iversity -retrosp -cou -frag -sod -##enting -elevated -moreover -##ints -##50 -acids -38 -employ -resulting -cognitive -importance -reduce -plant -community -hours -##ems -top -pancre -prote -absence -onset -mitochondr -cryst -glob -marked -pregnancy -simple -##ove -myocard -medium -rare -##ather -products -linear -implant -convention -degree -##otid -##umin -survey -##ptake -questionna -remains -achieved -log -##omer -transport -las -lines -access -reli -sci -particularly -migr -seven -infections -##bo -material -##oint -microm -natural -##ulate -capacity -transform -sulf -##oids -toler -lear -##function -metabolism -##ca -pig -cytok -##trans -##arding -##ument -diagnosed -##ophil -##ople -##olesterol -fract -power -##ef -##uter -extract -procedures -older -problems -##etry -##ayed -##pri -states -open -markers -##ivery -transl -transm -require -##ogl -fluid -dig -doses -appropri -##dom -quantitative -##arb -delivery -ear -bacterial -##oster -##12 -exposed -examine -label -underst -nutr -upon -##arr -recovery -wor -##ics -conventional -highest -magnetic -uptake -carbon -tog -stable -progression -##utaneous -biops -undergo -variables -reproduc -together -clus -pathways -##put -##ortic -experience -proportion -induction -##ched -cardiovascular -course -calculated -stro -rest -##thr -##imens -experiment -stem -statistically -sequences -administered -##icians -strength -gamma -maximum -cycle -##nas -irr -##ogenous -origin -##atitis -consequ -pair -##uff -##se -act -##isms -component -kidney -##itional -adverse -estimated -iss -##encing -respond -whole -host -nuclear -regarding -summ -people -alcohol -ge -##brid -cohort -polymorph -##icial -##eti -respiratory -resistant -internal -bro -##oz -antagon -pp -65 -wall -linked -##ession -carried -tempor -public -##gl -much -198 -prefer -lact -objectives -ventricular -include -safety -inflammation -##inity -43 -little -needed -03 -##idine -39 -mutations -domain -##anced -wid -inhibitors -variation -##titution -mak -##osa -##istry -mamm -sleep -##rolog -environmental -##ank -hypertension -special -severity -best -modified -##other -fas -terms -birth -##anning -dimension -systemic -laboratory -##rix -distinct -specificity -protoc -##hydro -exhibited -##pling -hb -animal -ped -##acr -along -motor -subsequent -##activity -bp -taken -##ered -var -ster -relatively -endothelial -stroke -##roscopic -ng -identification -pt -authors -const -recorded -problem -fix -home -pm -cholesterol -alg -##ceptib -monitoring -44 -countr -evolution -##esized -chemotherapy -##encies -appar -selective -conn -radiation -microb -indicating -##ense -cys -mt -##cler -having -55 -##oding -##bp -atp -susceptib -bal -randomized -##ulating -##uring -spati -his -dysfunction -star -##ea -##ococc -##sh -acqu -late -yield -amount -macroph -degrad -reson -spe -wide -incor -appropriate -hypothesis -##inated -limit -##atives -traum -restric -mc -##eff -relevant -confidence -liqu -##io -able -moderate -##que -thick -source -physiological -measurement -chromat -bacteria -bound -remov -recomb -##onomic -platelet -##dl -excl -##otypes -interventions -##utive -causes -microscopy -treatments -##mediate -designed -tom -composition -underlying -##ame -ht -hs -sd -##oral -arterial -double -corresponding -matrix -72 -intracellular -adoles -##fact -mainly -grade -efficiency -##kn -compare -accuracy -approaches -41 -hybrid -males -stre -implement -hepat -quantif -intensity -gran -occur -soci -ven -substan -alk -contex -attention -prevention -cp -continuous -means -##rin -contribute -##urb -search -cos -46 -##itude -cerebral -females -##ights -strategy -##oxy -##uded -antibiotic -diabetic -mental -trig -##fs -##ressing -education -wild -difficult -famil -myocardial -alternative -anterior -developing -later -necessary -repair -gastric -terminal -##hood -world -##ometry -selection -cho -reach -though -##oprotein -adjus -##uration -detail -half -secretion -reviewed -shif -arg -memory -ter -frequently -tnf -chin -national -##amide -##neum -extent -99 -images -susp -promot -##anes -##ending -sodium -predicted -shows -chol -opp -##yth -##ique -infants -##ams -04 -stability -stimulated -##ariate -shap -##enty -directly -cd4 -52 -students -regard -repl -##oglob -bur -forms -challeng -genome -intake -transition -##rine -color -imm -appears -tool -##activ -##ished -##ometric -##ges -##illary -gel -persist -third -toxicity -##ool -adren -full -epithelial -incorpor -inh -larger -leading -facilit -47 -lesion -err -interfer -pi -hepatic -instr -isolates -generation -revers -plasm -hormone -##orn -mil -she -rs -view -resolution -coli -prospective -diam -last -##cc -processing -##yg -multiv -tetr -deriv -led -##iety -iso -reports -bo -transplantation -##izes -viral -atr -ta -generated -must -##ady -applications -##orbidity -neurop -67 -cand -##11 -##ee -eu -##eal -involvement -liquid -americ -56 -questionnaire -inhibitory -datab -regulated -metal -aimed -vas -##ils -eryth -##tively -fluorescence -simult -##plicated -placebo -##icle -pneum -mri -allel -above -strongly -##orrh -enzymes -simultaneous -ever -complexes -accumulation -prepared -learning -reactions -54 -rather -##ench -##ectiveness -adhes -##etric -analog -remained -##ened -##ering -enter -variety -lys -alb -##enic -##obacter -##ings -ca2 -gast -context -64 -##ope -investigation -effectiveness -urinary -advanced -eas -unc -sperm -candid -smoking -categ -exist -##lu -anx -mutation -tb -conj -clinically -unique -comm -49 -resonance -spinal -satis -reconstr -rab -yet -statistical -oxidative -unit -myel -princ -reference -##tile -53 -plants -##uv -percentage -nerv -dm -haem -##ector -##venous -stages -nine -substrate -reas -incub -hp -cath -##ively -profil -malignant -vess -disp -##tially -##ecutive -soft -##ga -finally -rt -ss -optimal -grad -lateral -kinetic -##olip -pyr -cortex -mechanical -##ars -bond -resection -subun -ns -specif -##land -removal -emb -agent -partial -systematic -lum -read -##ots -laser -publish -layer -##artic -specimens -responsible -pb -longer -dimensional -maternal -ann -introduction -85 -profile -transmission -significance -extre -##man -supplement -##oxide -##ella -##arily -white -##la -paras -unkn -##opathy -services -iron -squ -fatty -unknown -cultures -efficient -recurrence -##orting -certain -ten -mild -interest -asth -collagen -call -spont -image -mob -cas -##lation -trad -fish -##irus -induce -##ired -dog -clos -spontaneous -produce -51 -contact -57 -pass -highl -now -way -mitochondrial -##iology -58 -##aces -retrospective -affinity -##otide -##ectal -proj -##tral -##estern -consumption -product -62 -##aine -auto -commonly -##acch -assays -spatial -neural -infer -advant -invasive -fetal -fluor -interpre -##onucle -published -weak -##inations -rot -ure -##zed -replac -trend -##ula -##onia -##hold -majority -antic -63 -##ident -neon -ray -aw -predom -interview -numer -##otrop -rou -center -fav -olig -currently -germ -obes -global -ble -cytotox -##tation -nature -dynamics -cerv -extracellular -##erative -##astic -##ients -##den -intern -medicine -fed -tri -mutant -regulatory -infusion -formed -nanop -temporal -obesity -event -##elling -operation -subst -irradi -divid -implications -##dominal -aer -hemat -constit -fit -sexual -basal -potentially -extern -prostate -differential -coeff -96 -##itr -slow -occurs -upper -vitamin -aggreg -anat -##pled -specifically -countries -force -##ieve -intestinal -asym -59 -pg -66 -reactive -##ohist -##for -melan -ste -##phal -constant -near -agre -dil -marker -##o2 -68 -attrib -receiving -##itable -rr -points -staining -chromatography -neurolog -biopsy -histological -##arin -thyroid -cu -confirm -##ify -relation -##itary -segment -norm -prolong -algor -micros -manner -##erved -reducing -algorith -profession -##lyc -recommend -solid -immunohist -western -sa -anesth -gender -ves -tend -electroph -den -##alian -##obic -posterior -successfully -##most -involving -generally -aims -attenu -expected -become -gastro -morbidity -twenty -##aged -fram -broad -completed -cultured -organic -base -toxic -agon -##ectin -degradation -acetyl -ech -located -73 -assist -molecule -past -pse -dietary -##itone -adip -sr -core -avoid -resour -another -frequent -##ocal -intravenous -##tration -cad -odds -mixed -barri -cervical -prognosis -tract -relationships -rib -tel -purified -##ocl -external -matched -anxiety -aortic -##ytes -##osin -radical -widely -##cers -cb -##less -members -##ching -benefit -thres -oh -ds -abnormalities -abdominal -humans -cf -##tility -dl -joint -orth -benz -##onin -pseud -theory -##eta -neuronal -tomography -ifn -##hyth -recru -conclude -0001 -undergoing -predictive -##uls -absorption -ads -igg -attem -disch -person -promoter -##ories -eye -living -78 -##ema -adjusted -divided -thor -wave -##osing -manif -assemb -##sis -##ared -map -prognostic -hydroxy -flex -rabb -##odynamic -leg -retin -slight -##gens -cc -conform -space -##ields -almost -pathogenesis -consecutive -determination -particles -fo -comparable -interf -##oring -altered -vary -##eletal -far -subc -61 -peptides -##ontal -associations -##elium -residues -contribution -##lasts -defin -graft -##otion -diameter -optical -##to -##enge -complement -##uts -remain -around -inser -##olytic -##53 -##arian -variable -oxide -gh -##ections -##adder -account -elder -rob -aspects -sed -alterations -76 -throughout -feed -du -classification -74 -chromosome -accurate -##he -##ls -##aryn -##onch -channel -soil -##alk -69 -accept -rev -superior -##aring -##crim -phosphorylation -nanopartic -##elines -respect -usually -document -impaired -pancreatic -##olved -ga -hypox -##ounts -protocol -antioxid -gal -ligand -exch -##ico -promising -recip -##ogenetic -hydrogen -elements -pk -##esia -heterogene -leads -concept -recombinant -##16 -##ologies -lymphocytes -behavioral -recognition -convers -thickness -98 -migration -ful -phenotype -challenge -nas -gest -86 -allows -sources -roles -##aves -spl -discover -myc -83 -discrim -every -vaccine -lap -wound -fung -emph -urine -plus -##osyn -82 -behaviour -77 -##osite -##tering -92 -##rot -ana -tumour -ratios -proxim -elderly -##immun -vel -asthma -making -units -84 -impairment -uv -guidelines -ow -ultrasound -deficiency -ty -plays -elim -school -##isc -hipp -som -##dle -##itud -88 -cortical -71 -obstr -fast -membranes -requires -rapidly -infarc -cord -occurrence -##ats -root -pac -phosphate -extraction -##ori -repeated -bronch -basic -adequ -polar -synthesized -cereb -por -sph -##ention -spectroscopy -##uary -profiles -appeared -mode -##osine -nf -pen -spectrum -numbers -macrophages -cis -biochemical -fragment -93 -##ermal -multivariate -##erex -gp -neut -pla -computed -epidem -##ectivity -device -##lo -epis -md -inn -abund -traditional -##reh -97 -decision -needs -##iation -##13 -polymerase -extrem -explore -##ocamp -##isation -marrow -occl -potent -hepatitis -until -enrol -##ophys -87 -heat -europ -pal -79 -cv -offer -pathological -##acchar -##apping -safe -signs -##imen -subt -compl -94 -phenomen -##flu -signals -manifest -##gramm -illness -variability -##icles -predominant -programs -##used -cav -nod -nucleus -##ral -##pati -descrip -physicians -##ring -tc -##inate -depart -employed -compreh -poll -observations -completely -assign -aud -nh -endoscop -biomark -nursing -final -excess -oxidation -eyes -indicates -trauma -89 -agreement -##oration -##ucid -media -experienced -deep -veloc -clinic -excell -probably -costs -donor -construc -##rophy -inst -##phen -recommended -##bal -polic -make -pediatric -##clerosis -##let -divers -bu -##truct -##aptic -hippocamp -technology -neph -amb -recurrent -europe -pet -orient -spectrom -fibrob -##rs -bmi -occup -genomic -preoperative -estimate -compr -h2 -arch -assum -##azole -middle -morphology -targets -understand -polyp -protection -perception -##ologous -##erexp -##osynth -minutes -##ompan -##orter -fully -organization -div -##ialysis -apparent -find -operative -hg -metastasis -exce -susceptibility -##allel -what -##ulatory -stabil -accompan -tolerance -presentation -protective -onc -dogs -families -dental -endogenous -conver -noted -fusion -pan -##ylated -##thritis -analysed -np -towards -feas -location -university -earl -##ortun -adhesion -enti -##eds -carri -turn -overexp -coord -##tract -emotion -morphological -minor -motion -blot -pretre -contained -defic -excellent -##olds -##20 -providing -##teen -##ework -##eling -close -preparation -##way -##osomal -jap -##by -computer -issues -neutral -lapa -##osal -fa -rich -81 -distal -randomly -neoplas -##ervation -cytoplas -domains -##ester -volun -transf -afric -actin -defects -##ember -ischemia -bleeding -##tp -##erence -##group -reconstruction -##oked -framework -sectional -movement -##ercul -progress -ros -##iaz -independ -prolonged -csf -unl -##ged -american -##osterone -nons -##oline -stimuli -elucid -ovarian -lc -haz -##ives -microgram -extensive -thym -##cents -intact -benefits -##ancies -ben -none -caro -prem -ethanol -developmental -existing -intermediate -camp -##iform -fund -ather -##ict -milk -distance -##ocar -##ata -neck -##itively -theore -programm -delayed -deliver -satisf -necrosis -venous -##eli -##bl -##18 -aug -word -ischemic -oil -enhance -fif -##fa -whose -schiz -band -database -seems -az -sat -adolescents -feeding -##apse -emergency -##ician -##olysis -injuries -parallel -below -##ophag -typical -cruc -logistic -smaller -##asia -##ibly -choice -suitable -immunos -address -except -soluble -immuno -practic -##atment -##thers -synthetic -infarction -##ione -element -possibility -##aline -##imer -nam -mn -cancers -medication -##14 -transient -fracture -nervous -routine -bilateral -put -##estions -au -lang -accel -har -nanoparticles -##itoneal -showing -labeled -targeted -exchange -##itals -regen -correct -##ochemistry -curve -la -princip -##ream -service -reliable -##organ -##thal -hazard -maximal -##eg -serious -mill -hund -diffusion -clon -hundred -added -propose -growing -##esh -preven -ker -attach -##phosph -varied -demographic -observation -sequencing -3d -colum -##ococcus -##ophage -working -bladder -lv -##anth -gradi -fm -##cles -tun -egf -pv -##lycer -regional -cytokines -congen -cox -venti -gain -igf -perme -##gans -500 -mp -##opic -coupled -precurs -uns -lps -nurses -##ump -channels -vag -concluded -cyp -others -##ra -genotype -radio -comparing -##oglobin -primarily -hf -##igen -autom -cortic -break -oct -##iral -##oxyl -91 -pf -smo -dele -uter -ions -achieve -##ocation -markedly -dc -parents -skill -##taining -##thern -psychological -relax -##tenance -interv -fibrosis -ak -ip -run -fall -ens -trigg -zn -institution -therapies -maintenance -ring -antigens -thre -##anial -minimal -neutroph -tgf -phyl -amounts -conjug -additionally -##actic -nucleotide -##rophic -tubercul -epile -recognized -##idal -commerc -subject -native -blind -dors -cut -ranged -shift -197 -##aria -jan -tryp -##care -##rich -##osomes -progressive -diversity -##jection -metastatic -##leuk -##usc -proximal -##angl -classified -japan -industr -##inking -##stream -knee -bran -angle -infil -subgroup -injected -##ycin -retinal -##urg -tit -##ivalent -##15 -benign -periods -mh -##rocytes -thermal -port -maintained -bov -uncle -smooth -hospitals -t2 -antagonist -international -asc -electronic -seiz -3h -shape -antioxidant -solutions -eti -deficient -irradiation -musc -node -threshold -##genic -complication -##cal -subtyp -partially -invers -##tingu -communication -fiber -##itus -longitud -replacement -behaviors -contain -##urine -substantial -earlier -##orage -caps -failed -probe -never -##ament -occurring -monocl -algorithm -catheter -##erent -##ested -variations -unclear -kda -##ili -localized -spectra -##entical -##cher -reverse -##urys -scanning -describes -microg -vac -delta -implementation -perceived -tools -closely -exhibit -velocity -dex -subunit -##ophren -ru -nitrogen -tl -##atib -extended -sufficient -latter -subsequently -devices -cn -intervals -depth -shock -arm -##ias -hla -##ogens -united -enrich -distingu -metabolites -gc -neonatal -regular -enrolled -qualit -hydroly -interesting -schizophren -seem -influenced -prediction -hydroph -chinese -##letion -##apl -analyze -sensory -separation -tm -independently -##we -fab -##hythm -localization -hd -pathology -##mitted -adjust -ig -spin -identical -focused -estimates -beg -hemorrh -disturb -excre -##argeting -counter -##itiz -rise -opportun -healthcare -matter -amyl -surge -##17 -atrial -##ophageal -obtain -##tics -bul -represents -acquired -entire -positively -intensive -##avage -##aries -lam -vein -##uit -##ches -dominant -##rous -##ises -marg -##ontin -clearly -targeting -skeletal -emp -decreases -##try -p2 -incubation -bis -detailed -suppression -extracted -persons -height -invasion -derivatives -inactiv -comprehensive -cam -interleuk -##rium -thi -##yle -degen -largely -verte -##eck -hab -initiation -discharge -illustr -cytos -cover -perfusion -magnitude -sampling -extracts -encoding -contraction -##tious -fibers -direction -monoclonal -sten -##back -##ozyg -atom -pros -competi -enhancement -records -correlations -improving -january -##intestinal -derm -##elf -promote -seas -risks -psychiatric -fixed -presenting -childhood -crystall -aa -department -crystal -pneumonia -gastrointestinal -error -##19 -##irect -slightly -sinus -##ocrine -thirty -spectrometry -artif -moth -variants -##eptide -gab -thorac -challenges -pregnant -equal -measuring -emission -suppress -egg -beneficial -south -##east -hcv -fractures -done -allowed -interleukin -storage -cytokine -gland -circum -##anine -##ellar -300 -dependence -inclusion -tyros -ane -reason -characterization -producing -dispers -neurological -##aft -crucial -plan -##ohy -##ements -silic -face -refr -##ways -06 -##oside -##read -##opa -perc -numerous -goal -effectively -noise -surfaces -reh -##olec -longitudinal -expressing -adequate -workers -ethyl -opi -predictors -balance -healing -##factory -glutam -contrac -understood -explain -cluster -##oman -##ocarcin -vessels -nk -##elled -##atid -diverse -indirect -presents -250 -##queous -modification -muscles -limb -chick -tex -example -remaining -##athy -meta -parent -newly -possess -##beta -py -highlight -date -##oa -ldl -died -stimulus -allele -lob -##ike -lipoprotein -wors -##isa -ranging -exer -airway -vegf -substit -supported -theoretical -nitr -practices -aneurys -questions -aqueous -arth -tail -wr -##rotein -p53 -##acer -ast -neither -nmr -rhe -##rec -histopath -reper -dos -pel -strept -tuberculosis -kill -bc -##enchym -elic -summar -##otomy -metastases -hex -##itted -max -accompanied -coefficient -validated -aspir -##bc -obese -##ichia -albumin -networks -saline -decline -displayed -sustained -bovine -transduc -whom -##acin -hour -##icin -##isions -##ovirus -recommendations -kinetics -##rog -cd8 -suppressed -organisms -gold -##uting -leukemia -dy -corne -gm -##ensions -##alpha -admission -##a1 -transcriptional -ple -##oin -determining -granul -micrograms -##certain -polymorphism -##ades -immunore -ts -simultaneously -economic -##kin -amplitude -##ires -vector -##transfer -pulse -introduced -once -robust -persistent -##rief -##atase -residual -transcript -explored -dopamine -organs -##ologists -assigned -contains -##dehy -μm -aging -discussion -##part -##dm -particle -systolic -taking -spread -teach -##lim -##ners -mmp -##itory -controlling -consequences -mothers -##ifications -atheros -younger -arteries -##ipl -swit -##patient -considerable -##anding -conversion -coupling -##cribed -phenyl -consisted -frequencies -termin -lymphoma -acceler -mutants -mv -diast -engine -##ows -valve -##sin -##ycl -##itant -congenital -##ilateral -output -appearance -mis -thought -tes -##operatively -##okinetic -substrates -integrated -affecting -impl -investigations -directed -chest -120 -antimic -defect -salt -capable -green -gir -hm -##illin -vul -poorly -antimicrob -personal -fill -colorectal -ket -##rose -possibly -speed -##ulum -ni -langu -resources -##ina -##be -pool -delay -limitations -##ectious -##ensin -##fr -volunte -##ton -##enes -return -axis -facilitate -mammalian -replication -thin -ethn -functioning -mand -depending -reflect -favor -##oglyc -documented -##eless -##omycin -##udes -##otypic -attemp -##urally -count -clar -dehydro -kapp -pretreatment -reproductive -nad -emotional -125 -wt -##ush -called -##np -##mediately -##xt -2000 -##ocardi -volt -clearance -persp -demonstrates -pharmacological -antibiotics -elisa -black -##ban -##osens -##oe -microbial -##uvant -##ecal -lar -graph -rehabil -electrical -circulating -escher -##enia -immediately -mineral -dual -##uctive -autoimmun -fibr -t1 -bow -escherichia -bat -amp -separate -fresh -lh -ib -influenz -agonist -##wide -sections -move -##oplast -pack -recording -cod -07 -rod -##bor -##oved -cd3 -##bar -rein -radiotherapy -hair -150 -affects -explained -efforts -characterize -nasal -retention -##illus -##unding -##oic -rp -validity -reliability -original -gangl -substance -varying -##plasia -sarc -##iciently -hypoxia -expans -infectious -placed -transformation -ck -m2 -phospholip -inoc -fluorescent -dend -fractions -creatin -cul -lowest -satisfaction -##get -buff -hpv -initially -##otropic -adsorption -staff -settings -distr -conserved -modulation -implantation -carotid -methodology -##exp -brief -##oidal -charge -simulations -proph -##upp -insuff -##iveness -removed -fu -pairs -##ocomp -induces -attit -ground -adop -seed -react -##anged -plate -##izations -feature -##cephal -gradient -##avy -probability -simulation -reactivity -hcc -schizophrenia -scat -##oblast -glomer -concern -gon -##ads -availability -##gs -serve -08 -repeat -##avel -cytoch -mim -gi -qualitative -retro -##oresis -##treated -reached -identifying -ulcer -arthritis -estrogen -infr -peroxid -puls -burden -junction -upreg -cyst -##osity -##acet -hybridization -coc -tyrosine -subjected -##ophosph -##axis -##itin -murine -minimum -vaccination -occlusion -##ze -mature -knock -duct -disability -jun -dp -cyclic -##b1 -mixture -words -plastic -glycop -restricted -suic -yeast -receive -##uria -cdna -##electr -comput -coding -echocardi -centers -casp -concerning -fragments -##aphyl -adherence -##ocytic -equivalent -planning -deaths -language -decl -rank -##cap -##insic -unf -proved -##aper -spr -400 -lt -zinc -nearly -rehabilitation -eb -compart -lep -##ostasis -column -##hg -advances -changed -actions -temperatures -regulate -users -hearing -heavy -isolation -tax -barrier -##acl -##atically -advantages -similarly -sf -deletion -gap -biomarkers -attenuated -##acent -pathogen -includes -##entia -##omet -##sa -2010 -display -physician -policy -urban -woman -pelv -##opo -european -assessing -practical -steps -cytotoxic -concom -pure -embryos -admitted -catalytic -belong -fibrin -ce -summary -involves -tp -##pass -##ware -antimicrobial -skills -##usive -##usal -##accharide -parameter -biosynth -dip -rating -gaba -thereby -methylation -collection -##forms -ligands -walk -express -##25 -epithelium -stay -constructed -##dehyde -phenomenon -issue -cycles -##illance -stenosis -viability -tube -modeling -rar -manip -draw -bile -pathophys -composite -scientif -phases -scan -fibroblasts -cartil -##co -infant -concomitant -prevented -##err -##wh -##isp -prophyl -released -ages -leaf -##anded -sensor -fixation -medial -bias -##olin -treating -accoun -endoscopic -discre -nodes -##wise -##oscopy -trim -standardized -sera -variance -composed -##itol -monitored -austr -evoked -depressive -untreated -femoral -symptomatic -dementia -mb -shorter -absol -surveillance -##nt -bin -##time -pharmacokinetic -antis -ba -##ilities -implicated -##ohydr -surviv -p3 -transpor -##omers -##40 -sulfate -advantage -stere -##issions -dry -##onas -reporting -immediate -saliv -fields -##ash -supr -deposition -##aemia -consisting -segments -retrospectively -overexpression -sple -glycos -polymorphisms -variant -trunc -mi -distributed -forming -nico -si -melanoma -gli -##ache -analges -##path -##acial -##transferase -viruses -dissoci -attributed -bowel -instrument -db -blocked -rheum -##iologic -suspected -##inning -##idin -foot -inters -cg -iod -improvements -μg -give -glutath -remark -##iance -dorsal -china -utilization -generate -room -driven -north -spectral -##chron -deform -flav -differentiated -sti -metall -pitu -gover -##teine -comparative -sets -zone -excretion -experiences -sir -##oon -dimin -##ril -##abilities -lens -tasks -adjacent -expansion -assisted -aggregation -own -nitric -aid -assembly -depends -mell -volunteers -ing -nt -locus -rabbit -asymm -overl -applic -##ensus -pigs -stat -progen -interface -dram -multid -scientific -ld -relevance -##23 -##mentation -##pler -increasingly -sedim -##otr -dehydrogen -fol -negatively -libr -##ogram -undert -##pan -predominantly -influenza -##omical -##angi -rout -##geal -limits -rural -anesthesia -adenocarcin -09 -##ilib -restriction -separated -glutathione -apoptotic -referred -became -breath -socio -##term -pu -parad -operating -traits -glutamate -contamin -next -clusters -##ilibrium -hard -take -elevation -embryonic -##plc -##des -tg -nc -biof -susceptible -immob -sv -nuclei -extremely -donors -forty -drinking -##ury -tur -compet -detectable -##mp -candidate -commercial -pic -pituitary -##ulus -immunity -evaluating -##lets -synaptic -interpretation -insertion -laparoscopic -bor -mmol -aids -autoimmune -miss -check -errors -steroid -substitution -##ks -tumours -cx -##eting -2d -cum -cytotoxicity -prosth -extension -wa -quant -recruited -waste -electrophoresis -cataly -loading -potentials -facial -recipients -prelim -osc -curves -##ips -valuable -diastolic -acetate -bypass -##acting -nr -putative -##gar -relaxation -recovered -easily -cytochrome -##water -##57 -ub -sho -anomal -absolute -pil -episodes -expect -##aryngeal -multic -mucosa -analytical -bind -proton -bd -angiot -cry -professional -threat -tob -counts -obstruction -gram -african -decreasing -xen -pathogens -vessel -twice -glycoprotein -calculations -transgenic -abuse -subcutaneous -##agulation -lymphocyte -adrenal -house -veh -hdl -always -continued -##ging -feedback -##ocytosis -grown -##hs -articles -##ruption -##atif -causing -uncertain -maintain -mostly -##hydr -ect -encoun -##urance -mellitus -hence -##bre -##era -##oints -bac -regulating -synthase -biology -angiotensin -reasons -2009 -preparations -cavity -cartilage -spleen -##acco -december -chains -pron -emphas -acad -adaptation -define -film -##veolar -##yte -live -incorporation -##tor -##aken -integration -art -ju -interviews -co2 -##ensis -fing -mitochondria -##abs -typically -##ptomatic -deficits -nutrition -fewer -clinicians -distinguish -correspond -reduces -penetr -cla -estr -considering -heterogeneity -greatly -utility -regimen -##isition -##rup -solvent -track -phylogenetic -1000 -2012 -angiography -gs -layers -reperfusion -##onate -categories -emerging -##fp -##igible -preliminary -cond -ester -quin -existence -indices -##ayer -predictor -scal -aure -##opol -reviews -residents -##atig -fever -excit -##ifying -adjustment -smokers -hydr -tobacco -copper -hypothal -adjuvant -cold -acquisition -##60 -decomp -##ematic -correction -blue -##igm -viol -communities -sept -pert -##oxin -epileps -list -strom -retic -##1a -oscill -##mental -fatig -buil -veget -##gi -compens -utilized -quantum -supplementation -extra -precursor -bearing -injections -##etal -orb -fabric -interferon -##dp -##alities -adenosine -##phenyl -maturation -cytoplasmic -sup -definition -mer -immunosupp -align -caspase -cns -##gal -plasmid -displac -software -items -contents -identi -hypothesized -voltage -##assium -clim -unus -focal -##fe -2011 -pall -##draw -cleavage -nonc -inferior -rc -outs -##vascular -project -orientation -##amous -e2 -withdraw -dehydrogenase -##amental -##ley -serot -toxin -carbohydr -potassium -true -asymptomatic -classical -arom -mening -loci -intrinsic -motiv -beam -conduct -##ophen -aureus -nutritional -ventilation -insight -ace -##adiol -jud -eosin -immunohistochemistry -##omat -stratif -##yc -influences -park -distress -gall -tib -yielded -teeth -hsp -loop -##ready -dialysis -##mark -##atidyl -already -config -perf -##weight -chloride -##amination -##ears -aggressive -genotypes -mucosal -requirements -unch -requiring -interc -mach -##70 -##uments -##dna -yo -##chem -regeneration -pyl -cutaneous -auditory -mobility -array -##min -##idase -bcl -exogenous -##ister -pump -fc -scales -##ilization -ln -classes -##22 -##oden -adaptive -dihydro -aort -predicting -input -amyloid -motif -validation -coch -2008 -amph -2013 -mapping -##ux -##ylate -differed -##enchymal -##choline -transfusion -bey -ah -staphyl -options -epilepsy -estimation -##uch -enzymatic -##phrine -evident -beyond -precip -gave -participation -relapse -neurot -pent -##ipp -steady -educational -carcinomas -trends -mu -rabbits -sched -dh -brom -sul -##ette -##rovers -inner -hemorrhage -##oyl -herein -bip -insights -arab -phyt -##30 -protocols -##olecular -lipids -inhal -##ief -##ulative -##eses -examinations -reversed -##iness -##zyme -cirrh -barriers -substances -abstract -scaff -eg -##oplasty -movements -remission -undertaken -fundamental -hz -permeability -secret -interact -002 -##max -remod -corneal -careful -lamin -rig -##fluores -salmon -worldwide -formal -mmhg -society -hemoglobin -pn -argin -vide -##ospor -parts -discovery -intram -trop -immunoglob -shr -seek -controvers -immunohistochemical -thoracic -medications -##areness -superf -macrophage -stent -overc -fatigue -correlate -hemodynamic -phosphatase -ocular -embol -##opathic -precise -##azol -##iction -##urated -positions -##edic -##rounding -electrode -fals -2014 -lactate -regardless -2005 -uterine -vesicles -careg -cation -inducing -heterogeneous -acting -principal -author -starting -glass -anth -practition -determin -ammon -delivered -implants -enab -frontal -##adily -epidemiological -deb -heparin -member -uniform -aberr -indicators -dt -simulated -mor -epit -awareness -cham -thal -allergic -thrombosis -war -offers -films -##ritic -comparisons -##umab -consensus -##ought -##emat -micr -pathogenic -inhibits -##point -bodies -surrounding -nar -squamous -biopsies -##mod -##aints -empir -prep -subunits -truncated -schem -cultiv -##ensities -2007 -science -##arch -rational -lacking -hplc -##ime -homeostasis -concurr -peg -##ibration -p4 -parental -fetus -environments -##tead -osteop -hapl -feasibility -itself -pelvic -rupt -substantially -traumatic -gestation -residue -spine -professionals -mast -annual -labeling -similarity -creatinine -providers -2006 -technical -leaves -##nel -amplification -subjective -##avi -attack -cardiomy -hbv -etiology -##elihood -allowing -govern -testosterone -detecting -promoting -polys -##arrh -consistently -manifestations -##heimer -intrac -##tisol -combinations -abundance -alz -##aden -c3 -attr -sought -virt -opioid -sizes -mono -##ota -weighted -quad -retri -##aceu -ram -operated -capillary -implemented -disapp -malaria -lp -modifications -##adian -##azine -hippocampus -##anti -dimethyl -##ionic -fasting -clones -##inson -instead -sepsis -uncom -gestational -concerns -artificial -placement -##mc -esophageal -##ounced -o2 -##alc -##onuclear -initiated -team -fd -stom -resting -proven -supporting -##graph -photo -guided -##eded -hydroxyl -hypertensive -coverage -transcripts -version -alzheimer -005 -identity -ones -2015 -##epine -##oxic -gut -income -dimensions -occupational -##osome -##k1 -nanos -outer -prevalent -percutaneous -depletion -likelihood -translation -anaesth -##ectors -land -preventing -hydrolysis -interestingly -circulation -rb -born -guide -chond -promin -peritoneal -compression -compliance -##otides -diffuse -##apped -fifty -eeg -##lin -option -challenging -colle -sympath -##ulties -supports -symm -equilibrium -idi -biofil -adenocarcinoma -accom -namely -contributes -##ypt -##icients -seizures -ubiqu -ras -digital -attitudes -antagonists -abundant -coefficients -##down -cann -leak -respondents -##opausal -virul -microscopic -researchers -insp -##oming -consists -created -aneurysm -raised -om -ot -extens -##itect -pronounced -deviation -atyp -ho -##ietic -lost -evolutionary -bot -quantification -recruit -unusual -##uron -hippocampal -##bi -nonin -arrest -false -##uity -##essions -girls -##otoxin -structured -yields -pren -speech -beginning -##ophilic -leuc -ips -ent -databases -tt -##load -lumbar -anatomical -ile -limiting -bic -biosynthesis -##uous -absent -collab -inactivation -cig -atpase -##ocys -triglycer -isot -ablation -eps -indications -doppler -resolved -vaginal -##ued -instit -##entricular -difficulties -tooth -reading -abol -##eps -parasite -uses -whe -fungal -plaque -rheumat -##astom -##ry -exists -gd -##lip -##ameth -japanese -ball -##nia -standards -participated -##ba -auc -phenotypes -constitu -homogene -survivors -ald -exact -efflu -atherosclerosis -trip -°c -##mented -##orous -##ilic -##90 -subset -rv -##going -fore -osm -changing -##titis -complicated -malignancy -quantified -cortisol -wk -mess -##27 -##ibular -perspective -brid -##ana -postn -tlr -rarely -unchanged -##80 -screened -complexity -chi -gn -galact -unilateral -stimulating -leth -flux -##lastic -##assay -fist -wash -egfr -##pread -cumulative -coated -##ss -interference -bed -ath -##iasis -northern -li -alleles -grafts -##ozygous -wides -vehicle -potenti -##onography -selectivity -habit -widespread -ank -ways -monol -##uscular -##rocyte -vic -diarrh -refract -vertebr -start -intes -portion -german -absorb -dich -favorable -##ilized -situation -decisions -mother -hormones -representative -elimination -entry -ask -##omonas -##iae -hospitalization -attachment -##inea -visits -resource -finger -feasible -##anted -neigh -decades -2004 -optimized -carriers -kb -arginine -##acranial -synerg -actual -hydrophobic -enriched -##opoietic -dosage -inhibiting -perman -discontin -##ni -princi -conserv -##tures -conflic -academic -##ocortic -parkinson -sound -retriev -faster -psychos -##enter -fine -crp -experimentally -degeneration -sud -pylori -semi -technologies -ongoing -discrimination -questionnaires -mann -##urity -flap -construct -makes -##uctase -platelets -obst -narrow -probes -disruption -hors -protease -excision -represented -principles -maintaining -##ded -copd -##unt -ultim -spher -volumes -##uctu -involve -spermat -dn -diets -##ifer -alkaline -dro -tension -phosphatidyl -preterm -##ws -adolescent -fluctu -##ulose -infiltration -greatest -infrared -##eptive -excluded -forces -elicited -vital -##ocular -cystic -2016 -incubated -embryo -scans -onto -exception -##odil -cigar -see -teaching -##elin -##osyl -partners -trained -##ote -depos -prospectively -##ardi -edema -computational -##dep -timing -locations -consequently -##p1 -observational -##mt -##ascul -vulner -drain -phag -distributions -##esterone -emerged -##chemical -vaccines -eating -adipose -efficiently -plane -pts -histologic -accurately -cytometry -equation -sham -##fold -lith -solub -consequence -rotation -##oxygen -##opes -prescrip -##vents -progesterone -attempt -precision -diagnoses -closure -serotonin -##romy -optic -microscop -translocation -univers -akt -determinants -contamination -hor -oxidase -reversible -categor -##agul -phenotypic -##vas -rejection -atm -intraoperative -##holds -cyan -gonad -performing -responsive -preferred -indicator -cloned -larv -##28 -gy -visible -alveolar -elucidate -law -metabolite -subtypes -agonists -##atile -modern -generalized -carrier -warr -##ometer -microc -explan -linkage -##yster -splic -supply -downreg -dissection -ly -genus -rf -colony -assessments -obvious -diffr -anastom -library -synchron -##bb -combining -readily -text -closed -demonstrating -effort -postm -##eed -##ometrial -##oded -reductions -##zing -morphine -##rene -needle -dark -ionic -##itic -##amycin -##ders -bulk -##21 -superoxide -description -##26 -acceptable -regimens -confer -aver -##aster -outside -million -nin -stiff -mom -##tice -carboxyl -hol -sequential -reflex -tab -sclerosis -km -consideration -representing -mos -former -panel -##rotic -ans -accep -encour -hyd -##ah -##ani -##rd -antit -##itting -uk -just -selectively -studying -##enol -chromosomal -trach -architect -quantify -complementary -postnatal -inher -##iph -modes -occas -outpatient -vision -downstream -incorporated -chromosomes -##24 -##ocr -##ocellular -follows -atypical -aqu -estradiol -fibrill -mhc -##agland -##inf -##matic -deter -reveals -surgeons -##itization -##born -##avelength -dd -2003 -##quin -couns -braz -improves -ensure -##2a -filtration -expressions -##ograft -immunoglobulin -addressed -##ieved -disappear -ls -ze -square -##tes -perfor -s1 -corrected -diverg -##related -dox -race -dye -suffering -##day -neutrophils -##°c -prominent -largest -shared -encephal -boys -refractory -##erin -##lam -tag -immunological -conformation -epidermal -route -sb -ger -##mia -##ato -glycer -datas -stronger -##odef -scattering -suicide -sessions -stop -##ret -bonds -ke -##rown -intestine -reinfor -##infl -enable -sediment -priv -ige -sheep -powerful -subcl -examples -embed -##estive -premature -angiogenesis -recruitment -##carb -##atible -homologous -stained -nos -##ison -##iar -##theless -produces -nevertheless -wet -extend -negl -##ocon -withdrawal -respective -mycobacter -enhancing -correlates -bab -repeti -guinea -##tors -2002 -##break -drop -immunofluores -##osyst -rectal -pes -considerably -operations -somatic -microarr -carbohydrate -anemia -##enari -offsp -youth -proliferative -endocrine -immunodef -##au -modulate -antico -tolerated -resemb -003 -organism -intersti -passive -##cine -secreted -nurse -echocardiography -sou -##uly -##ophyl -##vel -object -intracranial -integrity -corticoster -##anned -prostagland -##omorph -harv -##29 -dendritic -portal -qt -histopathological -##amus -dic -proportional -presum -pharmaceu -uc -tor -dw -encoded -tn -gla -vertical -oa -deterior -obstructive -shaped -##electric -formulation -viable -screen -comorbid -##iary -um -differentially -##omotor -##ticular -##ou -diminished -motility -nps -tf -##aving -blast -wind -postoperatively -radiograph -consult -dyst -##oor -commit -easy -noc -preference -wavelength -offspring -##ectom -outbreak -kd -##opl -transduction -fatal -fertil -southern -##ophila -engineering -nmol -##romes -blockade -sixty -##ivariate -dissociation -kappa -##65 -incom -neutrophil -instability -##platin -enl -exhibits -bes -##nea -june -cov -bands -cirrhosis -remodeling -activator -##ided -##adic -chromatin -terti -2001 -situations -reag -##arp -##iplinary -##iol -lungs -##ada -##embr -electroc -accounted -infiltr -platform -rice -sympathetic -buffer -aorta -brazil -depr -##isph -responded -ham -##angement -pkc -worse -scenari -competitive -preventive -emphasis -meaning -pathologic -walking -hepatocytes -transformed -neuros -##otes -permanent -ars -tree -nutrient -cocaine -derivative -##ococcal -##encephal -transfected -##ephal -##ordance -##omeric -tertiary -lac -##orbent -promoted -osteopor -hypers -##ulates -radiological -inos -##xy -dissem -country -histor -inside -managed -centre -roots -##inflamm -protect -shell -cisplatin -width -overview -obl -comprom -doub -##avirus -deficit -mirnas -formula -hl -igm -psychosocial -spor -shear -personality -qol -surfact -##empor -keratin -monocytes -trp -destr -duoden -canal -reductase -##usively -incomplet -automated -participate -prenatal -comprised -elastic -anaer -modalities -apparently -coordination -online -nonline -programme -propag -##ora -gastr -sca -##mg -focusing -glands -acidic -##flow -implanted -tach -chor -overweight -malform -acts -##2o -ft -##pro -metals -season -##alling -##ograms -stimulate -##arged -##yd -excitation -##asone -##acerb -##atum -somat -contributed -harm -exacerb -##uits -subgroups -##times -##rane -atoms -border -##atibility -##off -glomerular -larvae -visit -web -leptin -wat -topical -aromatic -numerical -incident -sen -depressed -arrhythm -##phthal -discovered -##plicit -satisfactory -##ert -mutagen -normally -excessive -##actin -endometrial -fgf -blocking -redox -applying -ecological -dermat -genetically -intrav -cytoplasm -decade -##acycl -diffraction -td -merc -lr -##oxacin -immunization -mesenchymal -pressures -chamber -pathophysiology -nonlinear -minute -manu -abolished -pow -pilot -biliary -horiz -##osus -intraper -pregnancies -lobe -vacu -adapted -item -mobile -urea -yr -##athyroid -gsh -##clc -##ubation -retained -##eptides -replic -##ulmonary -empirical -nd -accepted -sea -atrophy -##inter -math -carrying -patch -quick -importantly -veter -industry -ventral -fruit -span -idiopathic -manufact -innov -##ancer -isolate -evid -rearr -180 -encountered -lifest -cysteine -pancreas -highlights -paired -vascul -tight -nicotine -trich -dyn -candidates -recombination -subtype -secretory -##anal -overcome -##ai -endomet -burn -##affected -templ -interstitial -##ica -practitioners -##phosphate -oocytes -atomic -capture -contributing -potency -assumption -##ortions -biomass -prolifer -critically -optimization -##ropl -endothelium -surveys -staphylococcus -ideal -industrial -##adequ -loaded -##orec -hos -##oustic -binds -lev -inadequ -##asic -apical -##agnetic -hams -started -1990 -##rod -##ardia -cultural -##akes -radiographic -110 -stromal -rheumatoid -##oli -##iration -drainage -iga -tended -alt -stret -stomach -elong -scar -activating -unaffected -mitral -##uncture -enables -##roz -responsiveness -##ero -nano -create -##odon -##cn -pep -serial -difficulty -histamine -microt -let -contral -psa -differentiate -hypertrophy -catech -##inating -manual -lute -category -##ians -adjusting -##osan -inducible -##osures -euk -sib -mood -##ools -glauc -##ocin -spp -perceptions -concurrent -nerves -salmonella -adduc -sugar -fear -trait -regulates -attempts -radial -statistics -alteration -hu -clarify -video -immunodeficiency -processed -snps -##vasive -retard -##oar -isoforms -microbi -modulated -protected -ethnic -eukary -##vis -student -conformational -predisp -coating -##rox -t3 -unexp -biomarker -ki -##rt -##nat -amel -pge -latency -demand -sds -utilizing -dimer -perm -##rile -tendon -traff -posts -continue -immunoreactivity -march -foods -lin -malignancies -##isciplinary -filter -mediate -substituted -icu -newborn -contralateral -predictions -##ometh -cattle -weekly -perturb -exon -##oderm -##opyr -algorithms -acuity -seizure -aerobic -sensing -hemisph -synergistic -hypot -##agic -##avail -rupture -adp -pin -##ci -rrna -cats -##plex -city -##cepts -stiffness -blocks -600 -axial -bipolar -intell -quar -opposite -##lings -histology -believed -##arcoma -microorgan -neuron -##fort -eligible -displacement -learn -asked -##entis -##iable -##ropathy -night -rhythm -antidep -##tructure -registered -digestion -scheme -turnover -unlike -beh -unrelated -ppm -prophylaxis -bf -mab -fourth -guidance -##cious -systematically -virulence -constants -##hd -disturbances -anthrop -glucocortic -caud -##gia -bmd -sun -tau -wheat -##49 -fs -exclusively -optimum -signalling -##orp -austral -neurom -tand -mononuclear -ging -ti -exposures -soy -##asty -medull -xyl -mum -casc -atmosph -coagulation -##lands -prescribed -facilities -tap -sap -compartment -chemistry -activate -1999 -cows -gradually -fasc -##43 -descriptive -jour -explo -tendency -immunofluorescence -flexible -esophag -peric -##elves -##nis -je -microgl -citr -adrenergic -mammary -hematopoietic -##elve -##ees -##ulder -rn -##opsis -twelve -reflected -fitness -androgen -transporter -##mitt -declined -static -##sych -kore -fig -regulator -goals -##economic -bay -peroxidase -climate -disrup -examining -salivary -ef -##umor -##pecific -##56 -tandem -antidepress -tetra -hepatocellular -##osen -uncommon -covered -neuropsych -opening -##eres -##lem -severely -oes -opportunities -expanded -bt -accelerated -impacts -sulph -porcine -africa -equally -antiv -mixtures -shoulder -embedded -answ -driving -fibrillation -##bil -headache -##iking -##pes -##iod -serine -conv -sudden -autophag -discusses -claim -helpful -distinc -autologous -ecg -irre -##odialysis -eosinophil -division -inverse -##apt -##enopausal -controversial -##ynaptic -marine -lifestyle -upd -returned -registration -##aments -##ptic -lineage -##inosa -sucrose -##chlor -##uation -##embrane -supplemented -##ception -##glyc -dros -electric -concepts -frame -saturation -##aint -syndromes -rated -dramatically -institute -ventricle -too -partly -chosen -superficial -##amethasone -genomes -adhd -july -##accharides -##ques -indication -##ariable -building -share -cvd -neurode -configuration -##ectable -circular -ophthal -##opolys -##imetry -##erge -neurodegen -gyn -vectors -birds -##idium -bif -ventil -financ -offered -bud -##gone -shifts -tac -property -restored -tip -c57 -conven -homology -usefulness -investigating -usual -##tidine -transferred -contributions -surpr -quite -196 -instruments -electrodes -chloro -##aste -lethal -modality -paradigm -epigen -aml -acr -suppressor -mov -alkal -construction -deoxy -plasticity -pit -swelling -attached -null -irradiated -tubular -chemok -undergone -distant -genotyp -##inflammatory -becomes -thems -pref -electrophys -box -##icking -themselves -familial -lifetime -##37 -opportunity -##vs -##ocor -##astatin -innate -polypeptide -enanti -mask -##aceae -histone -psori -conduction -subm -##estim -##ocated -weights -sometimes -perin -muscular -requirement -conservative -analogue -raw -fell -ide -leukocyte -##ala -1998 -mda -session -vin -004 -strand -besides -silica -crc -brach -zero -vo -##omatous -round -physiology -rhin -effector -##rim -placental -papill -visc -purification -##omb -strict -eggs -lm -##uate -phe -perioperative -hyperplasia -socioeconomic -college -epidemic -clamp -bol -pca -pooled -forward -##ping -linking -a1 -selen -recordings -sense -##rought -connected -extensively -amplified -drosophila -sick -injured -##aemic -hier -electrochemical -lowering -ia -urg -lup -stored -rub -registry -contaminated -##otoxicity -april -anatomy -homogeneous -hot -insufficient -##grade -##ploid -retina -competition -##ostatic -anion -##3a -##tives -neonates -mf -referral -orally -nodules -unlab -reporter -tens -iq -##viral -fibroblast -anaerobic -kidneys -eh -remarkable -ligament -biofilm -##idity -aspect -lag -##esium -preserved -a2 -deal -cure -hierarch -harb -breathing -hormonal -microtub -gives -son -margin -peaks -aerug -precursors -homolog -##oplasmic -2017 -alkyl -##gt -arteri -favour -##ocyan -microscope -farm -cysts -currents -placent -##ochrom -domes -catalyzed -traj -analogues -adapt -cess -##oradi -cef -##olone -dispar -polymers -##openia -##ologist -defense -encaps -aeruginosa -promotes -##activation -univariate -rect -vaccin -assumed -myeloid -neurotrans -mediators -english -hall -nacl -pore -silver -applicable -triggered -thrombin -schools -epidemiology -interp -naive -snp -luc -acyl -swe -##orecept -##ucted -kit -cement -waves -branch -functionally -architecture -naturally -##cp -expon -nond -cessation -##otal -surgeon -recall -anis -##antic -cues -gait -para -##cape -##ethylene -randomised -electroly -ckd -staging -osteoporosis -mirna -unexpected -sustain -pay -fos -##keletal -upregulated -##istar -append -ankle -abscess -purposes -##odal -bonding -microarray -examines -t4 -prostaglandin -fungi -chicken -tric -spontaneously -phosphorylated -took -throughput -periodontal -meal -circumfer -coval -served -κb -wistar -nitrate -##ipine -cin -##keys -hospitalized -exclusion -gross -comprising -follicular -140 -testicular -searched -##ylamide -crystals -insurance -gb -##hion -focuses -inadequate -domestic -equations -molar -graphene -glaucoma -##oprop -##eleton -illustrate -quantitatively -bs -kind -##io2 -##jun -scr -pharmaceutical -fox -##itized -##inase -dense -olfactory -##iciencies -##ulsion -clean -locally -mathematic -cro -indeed -emergence -mw -prescription -van -rough -verified -maps -tcr -intim -erythrocytes -ranges -inactive -caregivers -##aminergic -geometry -equip -chel -##elles -ala -nl -pseudomonas -specimen -cerebellar -elucidated -lymphoid -cmv -preservation -nmda -surprising -anomalies -##irc -intense -profound -##omics -entit -##ional -dexamethasone -##etized -acetylcholine -ultrasonography -##ague -##gu -1997 -incomplete -approved -##clerotic -##rozen -achieving -separately -analyzing -intermitt -fashion -kinases -resin -##enin -r2 -enhances -dosing -##acterial -##ino -##oprot -##oxygenase -seeking -##ober -mammals -##oblasts -anch -hearts -edge -acoustic -##35 -vasodil -aspart -aspirin -details -reflux -anc -histologically -mrnas -##ming -##ergies -##ighting -##esize -violence -dilution -epigenetic -ultimately -capability -meeting -juven -coinc -accumulated -ic50 -bund -ptsd -catechol -##cale -thresholds -fri -why -vp -gtp -p450 -neoplastic -##vant -expert -partner -ecosyst -##olarization -oligonucle -coping -tris -colonic -macro -capt -agric -communic -##otub -##ofib -parasites -##antib -front -balloon -antitumor -relaps -decay -p38 -invent -crude -##ellite -##opulmonary -jus -##idae -pancreatitis -frozen -clone -rnas -completion -sar -trace -neo -hind -agricult -pneumoniae -calculation -micror -sacr -##inergic -##aking -enough -enrichment -matching -##inescence -connectivity -vap -##anium -additive -##roplasty -fertility -policies -unst -evol -##±0 -crop -circadian -polarization -d1 -ureter -steroids -##oides -restoration -mscs -conscious -##lot -methanol -transfection -helix -##ivation -fsh -1995 -h2o2 -afford -hous -supernat -hyster -bioavail -##arum -conditioned -microorganisms -pollution -cohorts -nitro -statin -conjugated -labelled -##mal -blotting -##ger -vertebral -c57bl -upstream -mechanistic -130 -generating -progenitor -private -14c -##atonin -alignment -##oscopic -stratified -##atch -september -moderately -relief -##ographical -noninvasive -continuously -anticancer -clinics -threatening -formulations -##thromb -colitis -fistula -hydrog -verbal -alle -reticulum -tk -tone -translational -breeding -persistence -look -promotion -##oric -##f1 -##ionine -##iton -transmembrane -otherwise -principle -##ophore -##ped -glial -viscer -asian -che -bronchial -station -nsclc -##va -##tism -septic -scoring -october -conservation -crypt -##ubicin -scaven -c2 -employing -anatomic -sand -mapk -chemicals -adopted -##afts -neurologic -##3k -##utable -##rontal -ineff -d2 -mex -##key -herpes -opin -ont -seventy -mandibular -##orted -##adren -cigarette -behavioural -dent -ionization -minimally -calibration -copy -##axel -1996 -informed -##ulator -##t3 -medline -australia -palli -usage -interpreted -laborator -market -routin -##requ -cool -canine -##ulos -transmitted -renin -administr -pubmed -##onom -##rex -phenomena -lasting -l1 -ejection -aspiration -subp -resc -outl -endemic -popular -horizontal -catar -river -warm -charged -axons -stabilization -attractive -surfactant -herb -##otoxic -kappab -ecm -##olate -influencing -ganglion -discrep -ratings -alanine -cyclo -##obut -flat -df -##urgical -##olym -photon -simpl -tracking -perox -discrete -##amines -inpatient -trypt -cytosolic -##34 -arising -##mut -orbital -reaching -picture -##factor -antiviral -yl -neuropath -sig -recons -immature -entr -coa -rg -apply -##asm -melatonin -manipulation -sent -epiderm -tibial -mouth -antif -fp -asymmetric -sirna -tet -victim -neurob -dispersion -meet -antibacterial -insufficiency -enabled -penic -lupus -provision -moment -##ounding -##elect -notably -##pert -chd -1h -neuropathy -va -accompl -ole -reserv -##hl -devi -causal -inoculated -adsorb -meat -hypothalamic -##opos -cyclin -pip -##puts -observe -lipopolys -efflux -designated -##igation -##mm -105 -nb -ccr -##esare -suspension -laboratories -##graphy -multivariable -friend -lesser -amelior -pestic -##ffe -triple -ebv -gro -tsh -distric -episode -aph -objects -fn -raman -select -oriented -##ostatin -magnesium -rd -gyr -##osel -##ogenes -##tia -user -modest -definitive -evaluations -recipient -osteoar -normalized -##orative -erk -immunoreactive -##organic -##unk -rule -latent -##osarcoma -palliative -assayed -iu -cellulose -anticip -india -financial -imped -radicals -oxidized -sequenced -##letes -inversely -giving -cab -synov -scler -representation -revision -diarrhea -nal -hh -thyro -sg -photosynth -pleural -novo -pseudo -desc -##esarean -underl -##patients -incons -##arct -reproducibility -astrocytes -diaph -attributable -criterion -dysplasia -densities -guan -attenuation -forest -##aturated -granules -##asts -##tra -peroxidation -pari -aggregates -gst -adding -owing -##recip -##cy -##rag -nation -##met -hsv -directions -##ayers -erg -extreme -calculate -standing -lamb -covari -incision -##epith -haemat -grafting -uncertainty -##t1 -intermedi -micron -arise -##f2 -##yrin -cfu -releasing -gad -monkeys -##icul -##ecs -intermittent -recognize -validate -plasmin -fetuses -powder -##ito -compatible -destruction -establishment -migraine -##iatal -transplanted -leukocytes -neoplasms -becoming -disturbance -foreign -code -immobilized -deprivation -g1 -##static -avoidance -lumen -consistency -##ards -16s -grav -rac -cranial -##terase -nucleotides -hypoxic -hemodialysis -articular -dcs -##agement -sac -postpart -pigment -caffe -inserted -mathematical -striking -autonomic -incid -perinatal -conductance -converted -evolved -anesthetic -##tructural -contrary -unable -##enteric -fragmentation -cholec -kr -exert -postpartum -fecal -analgesia -##avir -arach -ambient -candida -##aly -##urable -##p3 -##thral -deterioration -vibr -##otherapeutic -kerat -condens -aneurysms -upregulation -abilities -transitions -discl -losses -massive -##oblastoma -suffered -prove -universal -ethical -remarkably -mas -160 -conjunction -amn -replaced -joints -bacillus -proce -##ogeneic -quantity -apprec -crani -shed -echo -##n1 -marginal -##ensitivity -conditioning -seasonal -switch -sampled -erythrocyte -dial -believe -athletes -c1 -abnormality -##uretic -##uck -constitute -##edical -alumin -cooper -##aryng -ome -##no -hosts -arc -parti -##g2 -##ova -##points -arabid -sharp -fdg -inver -##panic -penetration -alcoholic -multim -modify -trem -##67 -pharmacokinetics -##uric -csa -tbi -##but -inventory -din -radiographs -ligation -##enth -conceptual -correctly -lib -arabidopsis -cochle -drawn -lectin -spi -inorganic -##romycin -myocardium -##virus -historical -ded -microp -running -##oemb -dysp -lysine -thorough -##imb -cognition -describing -belonging -preclin -##usp -til -held -specialized -usa -striat -spectroscopic -##itors -knockdown -stra -flexion -masses -peer -autoantib -##olid -##omerase -filled -coil -expend -##zation -cannab -##anger -hif -immunob -mediating -polyc -proportions -wnt -ambulatory -catalyst -##uz -tyr -##bilical -##urnal -##floxacin -##plastic -avi -reward -equipment -##iliary -aspartate -developments -##antom -fibrinogen -##c1 -unstable -ingestion -tam -##essive -elective -##r1 -sy -gained -sarcoma -glu -##imus -glycine -preclinical -segreg -disulf -etc -mini -peroxide -splen -unp -##itated -##ares -envel -conversely -job -##luor -forced -facilitated -ovary -nephr -brown -phospholipid -eighty -##ifug -expendit -##gene -##qol -contractile -ih -giant -hbs -##pin -##ostomy -autophagy -cros -##olum -trigger -similarities -autops -swed -##p2 -vestib -interacting -##sk -lithium -##rospinal -1994 -resusc -neighb -scin -contacts -111 -##lipid -reproducible -takes -##ellum -eleven -heme -1a -fel -commercially -subsets -terminus -##entan -inoculation -vc -clustering -august -attempted -bott -contractions -trypsin -courses -slower -106 -fluids -name -visualization -essentially -sial -ai -mdr -colonization -visceral -soils -##enstr -##ozoa -aware -##after -800 -testis -aberrant -hypotheses -capsule -acth -##bumin -incontin -##itability -fibres -radiol -##ibilities -noradren -prophylactic -introduce -##iefs -cholinergic -immunosuppressive -balb -sole -batter -bioavailability -poorer -inconsist -##ectomized -phosphorus -##atation -125i -rings -unlabelled -bh -constructs -receiver -resol -slope -institutions -carry -knockout -overlap -fair -lipopolysaccharide -cerebrospinal -methodological -asd -postmenopausal -facult -fluctuations -gnrh -reflects -immunosorbent -glycogen -advanc -doctors -450 -##rupt -##imetric -##48 -##uding -##ecan -leakage -##aldehyde -chim -smoke -routinely -varies -pulses -institutional -autosomal -precipitation -##95 -revascular -transverse -1β -##ogenicity -##acing -subpop -##irable -##illi -jejun -h1 -irrevers -beliefs -chromatographic -moiety -##hn -explicit -anten -defective -##oxif -modulating -##ony -##oblastic -##pc -##odium -tachyc -thr -phantom -establishing -atherosclerotic -ethylene -cauc -liposomes -##epinephrine -photoc -vl -template -chit -limitation -summarize -##ilon -insect -oncology -distances -##38 -bw -agar -fermentation -exploration -##onduc -wastewater -canada -umbilical -encodes -kinds -##inance -droplet -cephal -homogen -gv -##odg -energies -##etamine -responding -norepinephrine -promise -dramatic -entity -spot -allevi -deplet -##bean -responders -##angements -eliminated -transforming -integrin -##benz -cosm -##hed -missing -daw -dut -genital -porous -disl -explanation -pad -slices -attending -arb -moving -intention -##nd -##regular -quanti -warranted -ccl -machine -nav -juvenile -pup -distinguished -gt -mention -los -ea -##eled -highlighted -deline -glycerol -surgically -parietal -phage -meg -##rolif -##lit -##psych -##75 -##utin -relatives -reproduction -##tified -caucas -intraperitoneal -##ronectin -##opia -corn -slowly -pth -irregular -wil -blockers -geographic -evidenced -##oprecip -fluoride -##omegal -##ura -seeds -2a -coex -caries -wounds -partition -dg -reflecting -resident -preferentially -determinant -interpret -yell -apart -averaged -ipsilateral -cascade -myosin -##ulsive -gfp -periodic -asper -west -electrocardi -biomedical -ammonium -monocyte -anaesthesia -##atosis -##idazole -allergy -regarded -attend -##ius -##allow -drink -apo -prone -##39 -##ygd -##entists -ses -##ocompatibility -caroten -ectopic -006 -cataract -##thood -##ble -prolactin -nig -maxillary -scaffolds -minimize -##osidase -##orubicin -analgesic -corp -pollut -dopaminergic -pci -plex -submitted -leu -adulthood -obtaining -endometri -##flur -isoform -##itig -##atr -##rowth -neuroprot -tick -aryl -##ilar -manage -psoriasis -scaffold -regulators -hydrocarb -##izumab -allogeneic -propr -wear -lymphatic -trna -##acil -amygd -##ylic -allo -cardiomyopathy -##oconstr -drive -##environ -poli -optimize -counseling -##ococci -apparatus -##vem -ibd -##ida -keep -##rof -affective -augmented -##ophan -##omyc -101 -mitogen -ganglia -schedule -titers -catenin -catalys -osteoc -prefrontal -toc -folding -extremity -comorbidities -codon -saliva -cot -mimic -tin -##eces -##45 -multidisciplinary -##amer -1980 -prescrib -proc -consumed -household -respiration -ulcers -poison -dsm -##oreg -mesh -cadh -plans -glycol -trap -cycling -iodine -estimating -##ele -sensors -termed -survived -mycobacterium -flexibility -##oir -named -effusion -accordingly -##epithelial -carcinogenesis -century -prompt -##inder -integral -finite -ascorb -starch -striatum -##ishman -##ensitive -preferences -silencing -##psychotic -##acrylamide -##onomy -##uled -parathyroid -participating -yellow -naf -##roid -endoscopy -leaders -eukaryotic -d3 -repetitive -piv -prosthesis -incontinence -transc -homozygous -cyp2 -hypothalamus -automatic -mrsa -osteoarthritis -volunt -mk -##cm -lid -##opress -polymerization -chiral -damaged -sensitization -triglyceride -##nk -irresp -##apine -veins -intraocular -weakness -vom -lowered -##pi -surprisingly -##rogens -heating -dismut -fabricated -rose -##ocept -virtual -coherence -104 -stranded -emissions -fus -solubility -neurodegenerative -confined -constraints -continues -##onyl -adjunc -connection -follicles -circumst -demands -cust -microenviron -played -government -primers -mosqu -##inates -gingival -trimester -cac -##uvate -##enyl -disad -##63 -probable -circuit -thereafter -facility -delt -streptococcus -hba1 -biologically -permit -glioma -##oplastic -denat -card -##uing -iop -physiologic -confocal -detach -##aver -vulnerable -perh -rationale -gol -##otrophic -intens -macular -lengths -##mium -1992 -##uli -##unctiv -fruct -1993 -disadvant -ammonia -##ectic -mism -desired -splicing -terr -copies -influx -##iffer -##opus -electrophysiological -##ubin -acceptance -arthroplasty -swim -microbiota -typh -##aid -angles -duodenal -cip -flavon -aller -##ola -infarct -nick -##yclic -##affin -##acyl -cardiopulmonary -bolus -mcf -##42 -ko -123 -switching -adrenocept -bacterium -corticosteroids -classic -##iosis -fev -aeros -flight -dioxide -##99 -stabilized -catalase -structurally -antipsychotic -flank -profiling -cyclase -108 -complaints -##ecyl -shunt -proinflammatory -sulfur -themes -eventually -allograft -##hep -ll -consent -nucleic -pod -##01 -trab -novem -wean -centres -immunotherapy -anastomosis -##ivity -paed -genetics -east -irrespective -uro -perhaps -chemot -winter -legal -rif -antigenic -##arboxyl -dod -america -presumably -##imp -##echanical -metric -pollen -##pyr -103 -fertilization -c4 -propagation -diaz -##psia -november -anthr -undet -seemed -horses -microvascular -cochrane -sprague -nac -assembled -dawley -reservoir -##mitter -##55 -percept -##brain -sized -tachycardia -fourier -gynec -mercury -window -mind -##aker -microl -filling -brady -##omial -engagement -boundary -epitopes -perfused -metalloprotein -leishman -##olet -today -get -helical -thrombocyt -bb -excitatory -stain -hcg -acup -plaques -hyperp -planned -detector -mosquito -##abine -bacteri -waters -trunk -##inting -slud -##arium -remote -##diagn -senes -placenta -helic -hydrochlor -reversal -hodg -##osites -102 -multil -incorporating -##uclear -ninety -##odegrad -accounting -apoe -ethnicity -##o4 -axonal -nephropathy -dismutase -papillary -phenolic -progressively -angina -anticoagul -colonies -persisted -cd34 -##asi -abc -ubiquitin -ppar -##a2 -hazards -roc -committe -sag -zones -##ettes -007 -considerations -kh -fulf -##ophyll -straight -modif -##dd -crystalline -triglycerides -organizations -internet -motivation -pretreated -germany -inherited -binary -nanom -##glycer -scored -cadherin -##icillin -m1 -epitope -productivity -febr -accessible -salv -tai -dest -autism -##hypert -##otropin -splenic -arrays -quadr -zebr -hodgkin -##ielding -dair -epidural -instruction -##ince -mitotic -spherical -aβ -judg -##yroidism -medias -##enzyme -inhalation -plates -gray -hba1c -##abolic -wm -adhesive -f1 -vomiting -##adj -vocal -##ozo -subd -symmetry -hi -psychiat -##ografts -biologic -microv -clinicopath -transferase -multicenter -blocker -spent -filament -views -##retro -fifteen -hygi -##amph -earth -##iac -tradition -chitosan -repeats -papers -fibronectin -prostatic -caffeine -##olding -##irubin -spir -immunized -microd -antiretro -render -february -compan -bull -perspectives -rhiz -admissions -limbs -lb -septal -rely -serological -dissolved -tryptophan -suture -##opter -##arial -hyal -prevents -abl -##roliferative -##monic -acs -organized -positivity -tro -paral -messeng -##itten -challenged -briefly -elongation -wood -fate -cdc -asperg -inapp -ka -ago -carefully -rhod -overlapping -engineered -##lyl -advance -menstr -intended -arachid -##lementing -##omod -idea -islet -malformations -nonspecific -prl -analogs -##itudes -walls -##mission -immunost -##borns -addressing -hyperglyc -pulp -##ropri -experts -eastern -tol -##ortive -pulsed -phon -1991 -isoth -prednis -nadph -008 -san -individually -coordinated -excited -mitig -turb -scenarios -tea -summer -dairy -cris -curative -viscosity -cough -omega -collectively -cadmium -arms -tong -tpa -titr -##flurane -##pe -big -##ochemically -hered -##tron -curric -retrieval -sludge -lineages -defining -differs -accid -twin -stream -##obs -circumference -personnel -thymus -motifs -remove -##x2 -split -ie -spind -vasopress -cerebellum -##icans -ff -##otroph -inappropri -conflict -##tir -endoplasmic -mesenteric -immunoblot -p1 -impairments -designs -##onazole -vt -##parts -analyse -extrap -intravenously -oz -pdgf -##ply -constituents -newborns -gem -reasonable -bioactive -##lements -resorption -depleted -emg -ophthalm -fh -exhibiting -programmes -thromboemb -dermal -##inge -##rogenic -aor -cyclospor -cholecyst -methionine -hierarchical -##entary -lands -autopsy -haplotype -##icidal -indian -links -myeloma -faculty -##amil -g2 -intermediates -naph -expectations -##eria -##66 -gels -ice -australian -vig -dynam -##ardial -fused -##onym -mpa -categorized -trees -nitros -accounts -rigid -##arse -glucagon -ultravi -nutrients -ultraviolet -resected -mentioned -mang -pals -kept -conductivity -##bow -matric -##ucl -ment -##exin -dust -explor -gaps -resuscitation -familiar -bases -doxorubicin -##fluor -gf -spite -antifung -##fish -##obarb -ribosomal -spring -calif -##edema -revascularization -heterozygous -dead -cerevis -semic -killed -quantities -arsenic -chr -disappeared -inherent -manifestation -counterparts -polyacrylamide -synovial -relating -spermatozoa -immunocyt -notch -cea -hypoglyc -compartments -nemat -lives -monolayer -handling -interviewed -##thy -pec -cationic -##b2 -lactam -vast -trabec -xanth -immunoprecip -bmp -descending -##hal -cerevisiae -intubation -chemically -stresses -infertility -falls -##obacteria -antiretroviral -pmol -isotope -connective -anesthetized -##amel -strongest -sacch -fractional -contract -hypersensitivity -foci -##glutin -occasion -stationary -meaningful -cit -lucifer -lean -##osides -electrostatic -publications -suggestive -icd -killing -predicts -publ -theories -##iers -lign -solar -##obac -laryngeal -##epam -empt -endotoxin -##aved -thermodynamic -cub -pge2 -built -##idic -131 -fmri -mixing -##mv -preserv -##erated -differing -competence -collaboration -discharged -concentrated -pah -dv -planar -grain -voluntary -##hepatic -vestibular -tot -##eption -plasminogen -circumstances -##amb -stone -branches -titanium -promoters -copolym -oxal -rain -mating -kary -summarizes -glucocorticoid -##eries -leucine -saturated -disulfide -island -inputs -pw -enabling -began -synaps -chlam -##aching -##odontic -##enal -converg -tracheal -feel -##stem -107 -shortening -irreversible -nause -hydroxylase -##cein -reacted -mot -##fil -##88 -ultrastructural -decomposition -##47 -hemisphere -kl -##59 -coast -eros -##ographically -symb -junctions -##ife -plasmids -myelin -build -##ocycl -tof -##ophosphate -causative -##ented -##amino -nanotub -gyrus -centered -bdnf -lambda -pacing -shifted -demonstration -illustrated -lf -seal -##tracted -sexes -sexually -province -nausea -aeti -employment -ae -th1 -americans -##ift -polymorphic -annot -hypotherm -provider -##iding -nom -computerized -passage -rhyth -soc -sah -eigh -macrom -imper -californ -##ously -teleph -cyl -##itance -centrifug -##onit -larval -facilitates -scinti -##izer -cm2 -datasets -##oate -##onv -innovative -##agglutin -cations -##endocrine -dermatitis -biotin -##feeding -foss -##tium -##x1 -##acycline -neuropsychological -silicon -##ifferenti -109 -page -thymidine -perceptual -##olateral -applicability -##obenz -bath -managing -paediatric -endovascular -##orh -gelatin -stimulates -projection -colour -##flav -transporters -exten -retardation -retrograde -##amily -embase -dihydroxy -actively -suffer -dib -hispanic -antidepressant -connections -modelling -arrhythmias -##ocking -abortion -clonal -##32 -mapped -dissolution -sufficiently -##azone -butyl -gradients -proteolytic -##opr -microsph -orders -albicans -volatile -retinopathy -chip -mediator -executive -nic -1beta -crosso -precon -rbc -envelope -geometric -converting -##ience -pi3k -vo2 -reinforc -endpoint -batch -written -ww -minority -ri -aldosterone -fibre -prp -allergen -deletions -elast -bup -diaphrag -oscillations -calcification -##phasic -hernia -preoperatively -##68 -mucin -agricultural -##dh -##tigm -##wan -##asal -relate -maln -phil -reciproc -carp -efficac -vill -surveyed -scc -moun -bsa -##romagnetic -supportive -##olipid -calor -topic -radius -##gic -hereditary -characterised -percentages -##sc -habits -subcellular -cytology -bones -meningitis -behind -##o3 -dataset -amygdala -burst -hypotension -osmotic -atl -esters -##unting -##econd -serves -termination -##ritin -oocyte -consuming -lic -rho -diagnose -virtually -comorbidity -acl -antihypert -haemorrh -solvents -asymmetry -##oler -firing -cytogenetic -lived -unlik -##oac -cutoff -ami -participant -grass -##ido -methoxy -##entric -##idone -guideline -attacks -porph -antifungal -##44 -##mcs -quickly -unlikely -sports -continuing -restr -##traction -flag -choline -hemorrhagic -##fc -matrices -players -electrophore -propensity -als -degenerative -indicative -fifth -##ipr -imbal -##amidal -rules -consultation -neuromuscular -##erate -ber -faecal -epileptic -##ineal -##onectin -brains -glucuron -seconds -underestim -enamel -##rug -mcp -ordered -fv -##oxicity -extin -lattice -phospholipids -micromol -soybean -clinician -extrac -lew -axon -renew -sacrific -retr -##utrition -accomplished -intellig -cloning -boy -adenoma -aquatic -##rotomy -ldh -modifying -cerebro -autoantibodies -trypan -pock -tropical -fibrous -aux -t2dm -##ontrol -confirming -cleft -b1 -##afish -##berg -puber -luminal -##inical -french -beta1 -citrate -microw -##odic -zebrafish -##ocaine -##version -microenvironment -sedation -dysreg -biodegrad -th2 -ngf -endocard -adequately -contempor -indometh -rip -exceeded -hela -##arynx -cone -trif -##33 -dominated -bridge -epsilon -##fo -follicle -fulfill -##h2 -tongue -radioimmun -µm -publication -##while -kcal -##eties -reconstructed -verify -sharing -conjugate -bax -skull -asthmatic -demographics -indomethacin -##opre -telephone -phospholipase -pell -district -##ondyl -apt -displays -semen -vesicle -tio2 -clock -succin -oligos -revised -reagents -crf -dichlor -##cd -fire -enlarg -crossover -sporadic -adenovirus -glutamine -spik -thous -deposits -priming -intensities -##asome -screw -impedance -##tification -opinion -##ersion -115 -office -grades -match -dilatation -intron -curv -osa -infused -##tructures -her2 -ew -##obulin -mumol -embolization -atmospheric -##opyran -##alin -nicotin -behaviours -creating -adenomas -hydrophilic -##atinum -bayes -sput -perp -##omyel -exha -investigators -apolip -##plantation -##oms -falc -thiol -anova -electrolyte -segmental -##tituted -retrieved -absorbed -activates -calves -nodal -amine -##ovir -expand -##ois -committee -fourteen -vaccinated -troph -preceding -##inform -isomer -deox -scope -confound -scaling -##xa -immigr -multiplex -inappropriate -##onn -sigma -foundation -##ohex -imag -vasoconstr -covering -inactivated -stepwise -apnea -mediates -ends -apolipoprotein -exploratory -augmentation -definite -crohn -pyruvate -departments -posit -##romyces -##rost -superv -weakly -bv -messenger -filaments -primer -morning -ineffective -sequel -debate -ecl -palsy -neuroendocrine -painful -divergence -palm -1988 -perforation -##ophosphamide -cck -aq -p21 -oxygenation -faces -accommod -cooling -monthly -##oglycan -curc -geographical -islets -golgi -##atics -1989 -ling -corpus -amput -luciferase -polycl -##m1 -##abd -inhaled -neighbor -constitutive -ord -tumorigen -oxyt -multif -interfere -graded -downregulation -whil -recessive -temporary -burs -asia -confirms -actually -##oal -epine -folate -##role -alp -singlet -concerned -traffic -epic -arrangement -whilst -hands -locomotor -##anolol -mobilization -##urgery -hamster -livers -tors -compact -away -gather -blinded -paclit -prosthetic -adsorbed -antihypertensive -exerted -breastfeeding -stimulatory -degraded -n2 -##inted -##istrib -paclitaxel -##ulas -pathophysiological -habitat -possibilities -acceptor -discriminate -lipase -schw -ctl -quench -transcribed -terminals -eliminate -selenium -##omed -vanc -expenditure -urgent -nanoparticle -cochlear -bn -surrog -monotherapy -reflection -derive -adiponectin -cpg -angioplasty -##etrical -fum -mun -##uge -bench -##d1 -tubes -substitu -salts -cast -##uloskeletal -immunocomp -typing -sphing -rum -##otom -lysis -extending -radioactivity -genotyping -regurg -heavi -137 -##plicate -segmentation -deformation -penicillin -learned -duplex -sug -maize -relig -##afil -bilirubin -sbp -initiate -mutagenesis -polyethylene -lymphomas -atopic -##aromyces -projections -reagent -somewh -embolism -##vation -physic -brainstem -natri -confounding -##yel -##osper -elicit -cyclophosphamide -microsat -expiratory -entered -nonp -nif -percenti -accordance -reliably -strengths -bus -abd -##asp -dan -cesarean -journal -ether -gent -##ificant -musculoskeletal -acin -##orbol -diameters -##uterine -chemokine -##itating -taiwan -afferent -conjunctiv -ferritin -h3 -emphasize -waist -king -mec -capabilities -mds -unex -chance -##ofol -##orac -claims -##ompass -sided -epidemiologic -transpl -helper -##esophageal -priority -##inos -vasopressin -regime -differently -##oreceptor -deviations -allocated -128 -nail -encompass -bim -noct -mutated -##incter -vulnerability -mtt -immobilization -##anin -sixteen -##mann -c5 -synapses -##ka -ubiquit -striatal -interfering -lavage -rodent -intramuscular -stents -##otopic -lactic -##ading -##k2 -spike -sensitized -pivotal -##oprotective -##alanine -breakdown -loads -monomer -##ilis -##ona -##36 -platinum -##ethanol -propranolol -deposited -inconsistent -##ignificant -nitrite -##titude -009 -adolescence -##aplan -129 -balanced -battery -immunosuppression -erythemat -seropos -noradrenaline -##ulators -##aterials -inositol -taxa -##ete -catheters -uterus -##ussian -hmg -soon -esophagus -ultrasonic -240 -##odend -negligible -outbreaks -clot -proteinuria -##ipped -tracer -rm -##aminase -thrombus -##otechn -##77 -unres -sut -pathologies -thrombocytopenia -originally -##agr -cytosol -morbid -fluoro -kaplan -latin -µg -agreed -confirmation -summarized -granule -microglia -##osum -tear -varic -forear -mtdna -sector -##odi -electrophoretic -plot -dendr -polyclonal -iran -exponential -##artan -icp -california -##ushing -##ney -stratification -exec -somewhat -experiencing -successive -antip -totally -benzene -##traum -dehydr -taste -dried -grouped -##aced -medicare -operational -h2o -##coplas -vertebrate -warf -veterans -dissemination -duc -occip -doped -2b -##ophysical -rcts -chart -##gluc -positron -plexus -##etine -apc -argue -thirteen -##urium -diver -aerosol -dpp -extinc -##lutin -##opropyl -disabilities -antisense -##astric -avian -biosens -purs -##nes -immunomod -avoided -births -humoral -##lipidemia -##zymes -##000 -propofol -femur -heterozyg -##illa -menstrual -urolog -##ods -specialist -syr -particulate -unfav -spindle -diagnosing -downregulated -multidrug -##oney -##46 -safely -hop -ee -hrqol -116 -canadian -antioxidants -endometriosis -prescribing -##urons -fabrication -##hex -traditionally -118 -chronically -mg2 -##ersonal -vitre -##ocyst -refractive -vw -assistance -detrim -scd -nir -##keleton -##place -stack -vapor -harvested -##ittal -schist -dft -positioning -nct -fitting -register -gradual -peroxis -facilitating -##oti -acupuncture -##emporal -sacc -conducting -wider -phospho -lysosomal -medicinal -cerebrovascular -jnk -indirectly -differentiating -##enem -immunoassay -gps -invari -chlamyd -cosmetic -agglutin -ok -rodents -##estr -gallbl -c6 -aggression -hold -altering -##31 -periodon -hyperglycemia -spatially -dysph -immunostaining -##urin -epinephrine -raise -##electro -cow -custom -contemporary -italy -attributes -chap -##opathology -hope -caudal -##idol -viewed -inequ -interventional -zeal -##onium -##timal -attitude -come -board -killer -nocic -sor -travel -##ozyme -discontinuation -fructose -135 -##oxidase -carbox -hole -crt -##anic -borne -multis -table -keratinocytes -pma -sheet -phi -synthet -##glycerol -preec -lactation -beads -conjugates -bring -union -physi -qp -meier -##utely -##nary -interd -intrap -notion -clients -korean -##bone -genera -restore -##odiaz -paf -biod -deformity -fungus -pyrid -##imaging -##ofacial -##amma -disrupted -##thrombin -routes -##acaine -##yx -somatostatin -zealand -##vastatin -medicines -##orated -chickens -mirr -covalent -normot -racial -sediments -acceleration -wrist -falcip -neutrop -hap -##bred -mtor -1b -moist -deterg -fb -hygiene -hypog -deploy -##oea -126 -neuroprotective -dq -spots -originating -bromide -glycosylation -depolarization -semantic -unn -unexpl -pollutants -county -##xr -ingr -lymphaden -stones -intercellular -pmn -voc -fen -edta -##ita -e1 -chemotherapeutic -##formin -2018 -senescence -##odem -amongst -granular -fitted -entrop -neutralizing -##ecific -exerts -##urea -cuff -##onitrile -angiogenic -implementing -b2 -margins -##rexate -##rio -morphologic -dentin -codes -piper -proteases -cgmp -##aran -train -accident -neoplasia -interacts -sphincter -brought -inversion -endpoints -timely -##ylase -1987 -##fall -##oxane -scenario -military -suicidal -##olecules -##ustion -desirable -floor -iqr -##atectomy -##atite -addiction -digestive -worm -rearrangement -##erum -designing -polysaccharide -aminotransferase -ultra -stretch -falciparum -dextr -##inesterase -creatine -convenient -incremental -hn -hcl -overload -trache -##osamine -intrauterine -revealing -##pation -erad -112 -compromised -##adm -##98 -diffus -##rophot -solely -prepare -simply -couples -poisoning -functionalized -presentations -gallbladder -calf -asa -src -hydrogel -ran -yielding -assumptions -flaps -psychosis -mag -extinction -##ishing -aberrations -acetic -ik -linearly -ingred -neuroblast -##onuclease -##arinic -##bir -postin -kil -visualized -functionality -proof -##ropo -##urations -hes -desorption -pbs -kv -##growth -oce -drivers -pharmacy -fractionation -endoc -nickel -judged -sheath -hepatocyte -intracere -repress -patency -harmful -lock -rotational -##opher -efficacious -##lv -##mo -grading -bank -transcriptase -carbonyl -curriculum -arachidonic -proline -neoplasm -biomechanical -compensation -dot -catheterization -cxcr -sav -caucasian -corticosteroid -disseminated -##ortical -magnet -contribut -origins -appl -toxins -recurrences -initiating -france -tubules -##otrexate -mtx -forearm -##omethyl -polymeric -##hyd -tailored -unid -hydrochloride -nn -entirely -monte -washing -1α -##ationally -plots -bioinform -sequelae -β1 -##ecess -overexpressed -echocardiographic -trajector -benzodiaz -metformin -iliac -diagnostics -organizational -haplotypes -##umb -angular -trajectories -incomp -transpar -physicochemical -methan -modeled -tat -stock -polypeptides -york -dens -##grav -tubulin -comfort -1985 -##opharyngeal -##gd -creation -anter -enhancer -nich -rescue -histopathology -pfs -subf -oligonucleotide -possesses -methotrexate -vancomycin -##elia -##iate -bayesian -insensitive -bilayer -serotype -manifested -titer -##atric -dodecyl -spanish -covariates -deleter -##iab -##isely -intrath -smad -geometr -lake -diphenyl -histopathologic -##agia -plur -##wards -170 -proximity -##iently -beds -postsynaptic -lactobac -cue -paraffin -deleterious -145 -radiolab -##ipping -##ldl -microsomal -generic -amide -insects -ceramic -die -distinctive -specified -identifies -coexist -##orin -ics -icam -parenteral -funding -radioimmuno -freezing -##iesis -ascending -airways -tx -saccharomyces -spreading -heterologous -##otent -selectin -electros -kinematic -encode -ivf -aga -##amen -fruits -counting -warfarin -##2b -meanwhile -hbsag -bundle -exons -bios -gains -spectrophot -enlargement -pacem -circuits -disordered -aki -excluding -rays -postulated -curcumin -##aterally -eradication -m3 -avoiding -endothelin -detrimental -supplements -hct -ascites -##yrene -prev -broader -plateau -cobal -##je -deng -buc -##amed -##ican -discom -amplitudes -forens -definitions -weigh -pars -expensive -instances -microwave -dilated -imply -##meth -collaborative -cytoskeleton -pdt -benzyl -microspheres -permits -homes -pesticides -elab -salvage -##istribution -##ipt -##otropy -schwann -paid -voice -adducts -regurgitation -delays -##kinin -tons -consolid -felt -phenol -##oracic -realistic -contexts -tablets -##ibrill -intakes -discipl -occasionally -##oreal -supernatant -misc -erythropo -hypertherm -##58 -##flex -catalysts -implies -stroma -abdomen -documentation -disparities -advers -plasmodium -lipoproteins -insectic -trade -carboxy -ischaemic -laparotomy -##brids -regularly -recycl -equival -microsomes -pas -venom -cbf -alternatives -advice -carbam -radioactive -excised -114 -develops -bcr -photosynthetic -era -corresponded -robotic -oval -dengue -euth -gag -pgf -interactive -gfr -##vac -fuc -##atalytic -aur -##rep -vip -happ -girl -concordance -isomers -zno -energe -interm -pai -works -helps -assembl -nude -sciences -##ya -adoption -odor -tv -##oneph -1986 -borderline -carlo -microtubule -accumulate -lamina -axillary -metallic -##inin -##yrate -adipocytes -subtil -freed -##ocysteine -##infected -epidermis -ecosystem -topics -accessory -inability -##ofen -##umental -sna -quer -fev1 -arous -117 -##osensory -allerg -ductal -corticosterone -##chol -glycoproteins -intrad -##head -remed -selecting -backbone -##rolimus -antiserum -posttrans -dha -allocation -rock -##n2 -natriuretic -##agg -captured -pka -cadaver -##itabine -aspergillus -discomfort -sagittal -mets -http -failures -infusions -cholangi -crossl -synthesize -style -nap -interpl -##odyn -rej -constric -swine -probiotic -vm -pcs -stoch -##li -ances -projects -##oxifen -##pd -infrequ -msc -opt -##inyl -illnesses -2h -mca -normalization -glycemic -ascertain -trafficking -residence -amylase -entities -depended -len -sputum -decompression -##oking -therapeutics -doctor -bid -constitutes -##ivacaine -##imes -subtle -##tiles -##rist -chemo -gw -##nam -ix -subclinical -format -unnecess -myocytes -metaph -expanding -deduced -suspic -amorph -anticoagulation -ageing -brca -malnutrition -aggregate -postural -representations -micelles -resultant -cleaved -chimeric -ja -relies -eighteen -elbow -pharmacologic -cooperative -phagocytosis -##78 -a5 -##omyelitis -spain -##ua -vibration -precisely -orthop -subtilis -exploring -fluorescein -bottom -forensic -mabs -sociodem -psychotic -regenerative -security -choices -physically -willing -##ichi -outflow -124 -filters -calmod -cup -phorbol -ash -350 -##ximab -bill -synthetase -##aind -##eptidase -##atization -reserve -docking -##ativity -simulate -programmed -oxytocin -jaw -attended -##cell -paradox -##respons -mci -utilize -chondrocytes -allelic -detachment -rcc -periodontitis -##olol -polarity -parenchym -encapsulated -##adh -##ocysts -sedentary -histidine -teams -existed -##iest -parasitic -hydration -neuroimaging -urethral -analytes -progenitors -uncover -module -gated -iter -##inery -stool -polarized -210 -anions -necessarily -removing -##quine -quasi -##anz -semiconduc -ign -properly -pcos -patches -calmodulin -##ky -bcg -hyperch -tomato -sociodemographic -escape -hyperactivity -##acrylate -competing -bruc -substitutions -##ullary -##ulsions -bms -##edge -thickening -##thetic -sides -rl -polyps -oss -lymphocytic -##eremia -icc -hysterectomy -1alpha -underlie -trib -incorporate -afl -##eptic -monkey -crow -monolayers -1st -hv -nox -mosa -dextran -##oni -consumers -ulcerative -lidocaine -13c -geriatric -tocopher -##ulae -suspicion -oligodend -##abular -granulocyte -tumorigenesis -surviving -##eted -macroscopic -velocities -chf -solitary -title -isopro -190 -##noid -nocturnal -##eced -trail -##ietin -intran -##ordant -angiographic -bk -##itonin -rising -employees -igfbp -##rofloxacin -hepg2 -narr -##69 -##omere -boundaries -anticoagulant -ovari -bub -conditional -normotensive -##41 -##iated -disk -observer -malt -##vive -gg -duplication -cured -##trin -proliferating -encephalopathy -##inement -##elle -necessity -alive -pde -dependently -polyphen -italian -##graduate -scintigraphy -spray -dilation -hypothermia -biphasic -electromy -##iles -##onical -##cetin -pocket -adenine -preferential -microsatellite -tunnel -pon -##aa -terat -bact -cultivation -spondyl -transferrin -micrornas -adrenoceptor -##andial -stochastic -composites -emphasized -hrs -unfortun -qpcr -pyramidal -diethyl -modulates -vibrational -##irection -hybrids -outpatients -hat -##ball -##ophilus -anthropometric -stressed -imbalance -disappearance -intravascular -overt -victims -explores -muscarinic -contraceptive -##tise -theoretically -amniotic -attain -shapes -columns -tightly -##yz -cabg -##uan -performances -##mus -promp -satisfied -configurations -##anese -##hr -eosinophils -sto -cha -##lyca -detox -analogous -specialists -que -##ussis -swallow -interplay -ache -##akers -ethics -caution -##acrine -engaged -breaks -cass -##oactive -surrogate -tlr4 -rsv -##borne -##iii -##ievement -##urt -maze -f2 -stake -##amicin -teachers -instrumental -tuning -##asting -necrotic -labelling -##aign -enlarged -vasculature -##c2 -##62 -##treatment -occipital -babies -immunoprecipitation -113 -hpa -deficiencies -700 -whereby -integrating -##iguous -spaces -korea -neuroblastoma -##inositol -119 -ut -presumed -122 -rms -lenses -survive -##iders -illustrates -agency -thio -modulus -reactor -morphologically -tolerability -road -##ifier -scann -polyn -ease -##glut -conflicting -##lampsia -cooperation -##73 -microbiological -freedom -##onine -epo -##du -feb -dispersed -subpopulations -cryop -erk1 -##enoid -hypoglycemia -dislocation -palmit -vib -##kary -stat3 -##oke -observers -##alent -generates -hematoma -preeclampsia -aspar -achievement -nonh -nafld -campaign -drought -npy -compensatory -##arter -frail -alph -neurotransmitter -interpersonal -electrons -tamoxifen -##achlor -##osex -uric -synapt -gvhd -##omyces -erythematosus -pater -percentile -capill -intellect -weaning -vertebrates -glp -integrate -potentiation -##idym -procedural -cardiomyocytes -##umination -parenchyma -segregation -appra -chaper -nanow -ich -intoler -freeze -unnecessary -##jug -##quis -coordinate -amorphous -loops -siblings -rearrangements -corresponds -evolving -##itate -emt -ppi -adherent -intersp -disab -generations -rigorous -##ulins -##enoic -##ena -bright -mdd -transplants -atax -wire -##opathies -##eliac -##ourse -sma -##o1 -sox -##izable -ozone -##yric -##ece -##insulin -pyrim -##ocortical -reass -lncr -##osom -##band -##ml -twins -sulphate -surround -##arbonate -neovascular -##ectory -##obacterium -obstac -septum -mich -##list -attentional -methane -casein -pertussis -dyes -cxcl -indexes -##orax -##4a -##oidosis -landscape -mad -simplified -025 -bag -herbal -trajectory -##atology -scalp -scavenging -endurance -##ophilia -biosynthetic -radioimmunoassay -reciprocal -archae -nalox -hypertrophic -preceded -##imide -basement -smear -cw -perfect -oxidant -##icip -brachial -fly -formalin -qualitatively -favourable -uve -chapter -vegetables -approximation -rpe -technological -menisc -dimers -worsening -isop -midw -medullary -bending -##h1 -sj -scheduled -quantitation -acidosis -##igenous -pahs -psychometric -bonded -epididym -investigates -oxy -fuel -##ulent -sigm -sparing -intramolecular -extremities -galactose -##obi -quenching -##79 -operator -##ritis -incis -comprises -germination -##ystic -cyclooxygenase -##obarbital -talk -##itter -circulatory -jak -collateral -grew -exceeding -aluminum -fingerpr -libraries -stressors -hsp70 -statins -##ortem -triggers -##test -mosaic -introducing -##igenic -weaker -replace -haemoglobin -##ji -3p -juice -##d2 -##worm -suppressing -gonadotropin -roll -##ewise -amen -resolve -encouraging -uncontrol -newer -malformation -unfortunately -##otemporal -hsa -bony -tolerant -signature -proteomic -##okinase -##dis -##gestive -advoc -133 -answer -##agonal -addresses -##cg -##iterp -realized -antin -##0000 -highlighting -missed -contaminants -##51 -characterizing -lg -perturbation -hypercholesterol -crisis -toxicities -##onymous -cll -mechanics -obstetric -metrics -camera -sterile -##ander -##zz -##oselective -##ifferentiated -##tivities -invest -trust -atrop -vent -medulla -appropriately -jo -municip -hydroxyp -pears -ssc -dielectric -##rotid -naloxone -abrog -protecting -##ouracil -danger -##iline -waiting -nadh -cryopres -brush -insol -curvature -anisotropy -##otyping -ca1 -hern -joh -##orate -oils -vd -##arach -treg -accidents -cd44 -ky -clustered -##admill -ecc -##operoxidase -npc -neighborhood -palate -bad -consumer -sorption -##uran -treadmill -dedic -neuropathic -ftir -##ynchron -indigenous -political -patent -interfaces -chemokines -lept -von -##nm -##urc -heterod -thalass -unw -##marks -stores -arterio -theta -##amn -fid -5p -discrepancy -methodologies -alters -reinforcement -mediastinal -enteric -ubiquitous -colloidal -imid -cigarettes -england -##imension -##inities -retinoic -excitability -155 -homogeneity -workplace -ketamine -repression -##orable -vacuum -labour -formulated -clostr -determines -swimming -responsibility -vte -ascorbic -##72 -##grp -seedlings -nu -hypothyroidism -transcriptome -dystrophy -desens -condensation -##ositis -##andibular -hypothesize -discrimin -ahr -biofilms -##hol -arrhythmia -unev -brazilian -catalysis -mismatch -phenylalanine -methylene -##opy -illumin -ica -##oxia -distinguishing -nested -problematic -transit -##relin -##ughter -accompanying -orthodontic -##empl -engage -##orex -##therap -cd11 -compositions -ecosystems -parasit -clinicopathological -mandatory -##traumatic -xenopus -emo -s100 -laminin -##nitine -gentamicin -pha -allergens -radion -pem -verap -##osexual -##umps -##omavirus -##ends -silicone -conformations -##ictor -tolu -121 -cong -##icious -microflu -gastroenter -agree -desm -caregiver -##functional -horn -impression -microtubules -##ieties -verapamil -mrs -isometric -tetrahydro -##ophenyl -stopped -prototype -intellectual -droplets -##iled -exercises -drying -offic -adc -##tigma -dependency -machinery -suggestions -nanotubes -dedicated -legis -hematological -capacities -##1b -proteasome -discussions -progressed -divergent -ciprofloxacin -pearson -testes -lewis -retain -##epsin -scientists -antenatal -calls -##adjuvant -conce -mail -amputation -leadership -metalloproteinase -##ophylline -combine -scarce -centrifugation -reform -##genesis -anticipated -##ez -homocysteine -nodule -##odex -127 -astroc -sustainable -administrative -orn -managers -contractility -elemental -postpr -encourage -trav -##oxetine -seminal -regulations -entropy -rhinitis -transported -trabecular -ks -illumination -sorting -reoper -transiently -radiography -fda -##100 -pools -histories -electroencephal -aetiology -thaw -potentiated -perip -transgene -suspensions -antipl -suppressive -kir -valves -##anus -void -cpp -epi -vf -mesoth -efficiencies -flanking -ej -avp -packed -tuber -lysozyme -opposed -antidepressants -mexico -##aches -nont -calibr -##nar -hypo -laparoscopy -abeta -leaving -bicarbonate -spiral -##onephritis -arousal -##hi -simplex -updated -##emed -condom -puncture -radiologic -##amol -##ranean -ischaemia -attained -htlv -translated -microns -##quinone -##acheal -##acetic -plain -presynaptic -grating -##ialdehyde -hir -##m2 -##vix -suited -nether -prey -thymic -##iella -thalamus -periv -closer -supine -coagul -prere -federal -constitutively -ovulation -##ania -##develop -coded -titration -collecting -glomerul -##ogeny -broth -veterinary -master -anomaly -##itamin -ova -necro -contextual -dyspnea -evap -##esthetic -plurip -orbit -##ills -neutropenia -islands -rights -132 -colonoscopy -sugars -abstin -collect -interfacial -reperto -ghrelin -##omnia -##ocrit -opioids -analytic -helicobacter -eds -cannabis -canonical -packing -nanocomp -220 -practitioner -nephrectomy -imprin -pyro -compromise -congestive -purity -adenylate -ward -##ymethyl -##uinal -p16 -sonography -138 -sweden -ileum -##clim -rfl -effluent -##iber -splice -conve -clo -utr -measurable -stigma -undetectable -##rolase -##osoma -discharg -triaz -##ionate -aes -netherlands -dropp -h⋯ -thirds -commitment -stoichi -pbmc -c18 -dominance -aligned -pose -rid -elution -worksh -cylind -##zes -radionucl -mip -parametric -propyl -modules -obviously -##quar -antisera -breed -dysregulation -pom -qs -esi -possessed -strip -metap -extrav -portions -neuroc -mediter -##bach -slice -tonic -webs -lamp -schemes -manganese -immunologic -trigem -acutely -torque -enos -cobalt -approximate -146 -atmosphere -##bd -osteotomy -typhim -serotypes -satellite -cholecystectomy -##diagnosed -artifacts -soph -sweet -pups -leukemic -arbitr -##acs -suspended -wake -##itre -vasculitis -inexp -els -laryng -typhimurium -neoadjuvant -##irectional -expertise -moisture -amphetamine -protects -periphery -grafted -rdna -inheritance -huv -intelligence -anionic -oxo -sevent -mutual -overexpressing -hoc -##olation -undes -expectancy -##ucleotide -smart -habitats -##itum -enroll -assuming -accr -##120 -heavily -cel -shortened -nigr -detergent -sensation -emotions -##cales -mucus -cyclosporine -febrile -osteogenic -encephalitis -gov -sedimentation -hematologic -ultimate -##tillation -adam -##itish -hemodynamics -hit -hydrocarbons -confounders -approval -originated -osteoclast -cml -flash -phthal -cornea -exud -genotyped -contraind -covalently -symmetric -##mi -virolog -##54 -##oderma -##aric -assignment -triggering -immunoblotting -ensemb -moieties -orf -##inator -oncogene -cytomegal -##del -##ris -conception -medicaid -paralysis -parox -spirit -branching -organelles -orthopa -##enn -collapse -cholest -subop -serving -##ker -reticul -cil -sad -mineralization -phosphatidylinositol -biased -volumetric -manufactur -dbp -##infection -alternate -initiative -cultivars -crest -solving -preserving -pelvis -meningi -tensile -vinyl -accessibility -##ostr -##opril -transloc -indoor -skeleton -pacemaker -##bound -ws -globally -##olys -interne -figures -##iff -1984 -inhab -charges -1970 -whites -##ulture -zo -diseased -##ught -kingdom -rnase -144 -##oreceptors -internalization -raises -atropine -valv -##imensional -##urate -##roch -##ocryst -##rops -##52 -anaemia -microrna -##tide -consciousness -pacific -scf -spores -##omorphic -manufacturing -frog -cpr -##opause -thrombotic -postprandial -bbb -calp -acetonitrile -stenting -##cephalus -latest -nodular -coherent -tetracycline -hospitalizations -18f -chl -households -afp -affinities -deaf -cities -##iomy -vitreous -##ortal -oest -transmit -informative -tocopherol -chlorophyll -donation -likewise -infancy -potato -submuc -##oste -##oted -exclusive -taxonomic -seventeen -##optic -daytime -pcp -dal -supernatants -nucleoside -raising -capillaries -resten -explaining -irrigation -surgeries -##nps -vwf -agencies -increment -tang -conjugation -##neal -##olism -##azolium -##ilum -##ama -hemolytic -##uling -##uns -coat -transducer -osteoblasts -autoradi -vitr -secreting -##yles -triphosphate -##omotion -branched -##nals -##oplasm -comprise -begin -copolymer -##entanyl -infiltrating -vox -##oplasts -proteomics -sparse -diabetics -hinder -foam -cage -lutein -infras -sacrificed -##orporeal -cervix -acquire -tidal -distortion -trapping -vena -oligonucleotides -rotavirus -thinking -recommendation -radiology -nose -inexpensive -##atinib -package -mandible -measles -##imil -deeper -##birth -pir -##ochlor -progeny -triplet -career -vr -note -##eles -##hips -cryp -silent -##olith -contour -quercetin -##±1 -carbohydrates -##amid -pcb -dith -motone -specialty -##ophores -journals -hrv -##produc -##atelet -overd -##idi -##fulness -b6 -sciatic -exacerbation -insoluble -unm -outline -plantar -subcutaneously -lactobacillus -##ubs -pcl -postmortem -nev -mep -undersc -farms -##adium -disposition -monomers -pharmacists -##terenol -impacted -combines -##itoneally -cd40 -mediterranean -pix -neuroph -repeatedly -sheets -nlr -##ammon -cytomegalovirus -outlined -depolar -unin -crown -arteriovenous -##atil -hgf -##bles -rhythms -innervation -neurotrophic -eq -endocarditis -unsaturated -maldi -anorex -monocyt -phosphodies -humidity -##peritoneal -pand -##g1 -telomerase -##assays -##olk -cyp3a -glucocorticoids -proteinase -##organization -##athers -ovaries -##plicity -unfavorable -melting -bpd -came -seropositive -liv -##r2 -anp -benzo -##room -##acetate -##aceous -mhz -elegans -a549 -##requency -inverted -sarcoidosis -exclude -cd45 -ibs -ou -wb -##nic -##ogroup -prostheses -148 -dealing -cing -independence -osteosarcoma -primates -##hythmic -initiatives -##ectomies -periton -collective -amines -neurones -replicated -##ithromycin -undergoes -##uspid -overestim -instance -carboxylic -pak -flavonoids -audi -disturbed -pharmacodynamic -oblig -3a -snr -##imeter -distinction -holl -fibrils -khz -pneumococcal -catabol -searches -trigeminal -suboptimal -cathepsin -este -dopa -broadly -alloy -monophosphate -nifed -insomnia -texture -firm -nh2 -viii -penetrating -##enses -enk -mammography -cotton -gamb -kle -dutch -synchronous -litter -proteolysis -autonom -##l1 -parenchymal -competent -multin -salic -seques -3t3 -uncontrolled -determinations -kcl -##enders -steel -individualized -##inis -##oprost -stabilizing -endometrium -015 -exploited -##ogether -caring -fentanyl -plaus -nifedipine -tomographic -##oxication -##calc -timp -nsa -haemorrhage -##uminal -freshwater -##dynamic -altogether -gar -british -physiologically -alternatively -##64 -neutron -automatically -isoproterenol -malond -mps -accelerate -uniformly -##inoid -##roline -##orations -stretching -sensitiz -keto -implicit -trh -wing -##onder -keeping -compares -www -##izers -tow -tar -helped -discriminant -ry -arises -##85 -##erine -##quartile -controversy -phosphatidylcholine -discontinued -aba -cpt -intoxication -digit -cet -bifurc -baby -eat -coastal -ducts -audit -subarach -fk -##aul -notable -undifferentiated -##omide -transpos -nih -interquartile -explains -##trial -elusive -marital -bnp -disadvantages -troponin -parotid -somatosensory -bfgf -bod -ton -charts -mannose -suppresses -##etite -##riting -primed -attenuate -radiologists -protr -##pray -ecs -hept -touch -hydrogels -##inally -versatile -dispersal -superfamily -housing -intraperitoneally -leishmania -1d -multip -139 -bactericidal -leukaemia -##tebr -welf -flor -##ocele -tym -associate -robustness -redund -preserve -crops -cultivated -indole -posttraumatic -sew -peritonitis -bg -##amate -sport -##omatosis -extends -ventilatory -whit -virulent -characters -corrob -searching -fibro -linker -restenosis -##vern -fisher -discrepancies -esteem -midline -residential -bioinformatics -groundwater -supplementary -welfare -sit -globulin -qtl -##absorption -tibia -parenting -hemangi -declines -malondialdehyde -continuum -nationwide -flies -tablet -##71 -positioned -##ropyl -quarter -rheumatic -##axial -adjunct -ptx -reactivation -prolongation -lumin -freely -ppv -ocean -proposes -emulsion -cutting -nanocryst -##py -##urf -resili -implying -134 -vegetation -replicate -hyperthermia -iodide -hamsters -spanning -uninfected -flows -guanine -pyridine -inpatients -instrumentation -##uliar -##tt -##lated -oestr -jump -tips -altitude -topological -peculiar -prerequis -cag -toll -##ket -incen -nhl -kj -moral -menopausal -cgrp -esrd -dysphagia -amd -ataxia -oesophageal -##iliated -myofib -choroidal -alf -##ypsin -##actone -psi -posture -fluorouracil -impuls -erosion -durations -remn -##itch -infective -notic -##rect -permitted -offering -classify -psychiatry -##ticals -offset -stap -displaying -reality -methylated -##ondii -swedish -friends -methicillin -tremor -tracts -##oto -##ographs -references -plc -psychopathology -earliest -99m -delivering -##otri -documents -deleted -##cope -dichro -narc -##cycl -##best -231 -elevations -##icl -religious -microal -bap -##ishes -carc -pouch -cellul -subarachnoid -##oalveolar -emptying -diaphragm -disin -dots -##dc -012 -gaussian -prescriptions -s2 -##apatite -osse -##oter -poul -relapsed -histocompatibility -mib -occasions -atrium -dbs -cytoskeletal -diploid -inspiratory -buccal -##oglycans -1982 -inguinal -eyel -fg -continuity -##itivities -##aicin -##cl2 -precondition -destro -monocytogenes -##atrol -homogenates -leiomy -beef -string -readmission -prostatectomy -neurotoxicity -displaced -maca -odont -escal -photosens -naphth -osteoblast -adduct -##amins -afm -bulb -sentin -acceptability -lifes -ampk -grand -99mt -##osm -calculating -strictly -nrf2 -parv -tilt -pax -telomere -cavern -electromagnetic -multifactor -update -iris -##acute -carnitine -##using -ocd -cava -dehydration -anomalous -recoveries -##inous -smears -denti -discharges -epr -obstetr -orthopaedic -uncon -##ications -##iop -##ofrequency -uncomplicated -bpa -everyday -##ned -pores -morphogenesis -superc -##unctional -gastrin -cd25 -##veratrol -glioblastoma -reconstituted -pyrene -leave -opportunistic -adopt -repetition -menopause -thromboembolism -cirrhotic -rflp -##idia -grap -microcirc -carcinogenic -pun -136 -168 -##night -microin -threonine -##opoiesis -##orrhea -capital -stably -nhs -##itan -##istine -incorrect -endocytosis -bioc -checklist -directional -music -##esus -##therapy -##electron -tele -puberty -##xine -##imentin -pter -myo -cytological -##elastic -salinity -tal -paste -##eterm -##ford -##aginal -capsaicin -enterobacter -civ -cdk -rifamp -horse -pathogenicity -tears -preinc -##son -zyg -nid -##97 -alternating -##ulse -turkey -cave -kc -intermolecular -monomeric -proteome -rnai -apex -client -spacer -scl -duodenum -microfluidic -vimentin -hox -##ipar -##anyl -radiofrequency -zym -legs -cds -feelings -rectum -scattered -nsaids -electrospray -attracted -##icted -symptomatology -opac -squares -tert -refined -dressing -substitute -extracorporeal -metac -##hedral -operate -dipole -##romic -solve -ureteral -neighboring -norms -hdac -accumulating -anaesthetic -##oreact -1983 -recycling -maximize -papillomavirus -cd14 -5h -hen -##trials -unexpectedly -beats -retest -pesticide -seq -diarrhoea -clarified -resveratrol -##orphin -enrollment -outgrowth -knees -##forward -ny -##idden -##anch -concentrate -orange -wi -catecholamine -tagged -inferred -rim -adversely -##aded -fec -spectrometer -##izz -lod -##aryl -nanostructures -supers -acclim -teg -##acterium -descriptions -neurophys -##uistic -presch -justif -thigh -##uates -agarose -triang -hus -explanations -stated -ors -##rophin -repertoire -aven -erythroid -choose -recognizing -##udin -resis -aggrav -##othorax -mechanically -playing -gases -##opharm -congr -vagal -##zer -3rd -mms -sert -scot -stereot -##iring -##holders -tent -2nd -##bf -lactose -chitin -##estib -chromium -concurrently -neuropeptide -fundus -antiplatelet -epa -hamper -immunogenicity -##enone -outd -cpb -decarboxyl -intral -subclass -adaptations -##cor -clinicaltrials -signatures -iib -##azolam -desire -##orylation -quantifying -##leting -delir -niche -platforms -medically -oncogenic -ritu -##fen -viewing -##oxon -ileal -ninet -##erents -suitability -noticed -153 -belief -##dv -spastic -harmonic -faced -insignificant -probabilities -broil -nineteen -aps -##omegaly -##ei -maturity -##uber -permeation -gsk -scanner -twofold -175 -arranged -sickle -##imid -##ju -sorb -mirror -tubule -overs -alginate -nicotinic -gbm -neurodevelop -erbb -nv -hyperth -engr -prime -biotechn -stern -interl -linearity -##ectile -##isal -vegetable -hrt -##entic -##61 -molars -##atable -beams -lister -absorp -ears -##cnac -purch -sla -900 -clade -globin -fairly -convergence -sophistic -disclosed -longev -##oxib -freshly -99mtc -penile -aesthetic -rhesus -mari -nosoc -supplied -##esi -##orum -rituximab -emphasizes -clostridium -unexplained -cofactor -terres -zeta -defibrill -registr -conference -143 -friendly -starvation -carcass -bundles -gpi -consti -aaa -visually -mur -perturbations -belongs -##ago -##know -anaphyl -pict -##md -rhabd -##hp -passed -minimizing -parap -annually -silico -transfusions -sentinel -abr -trifluor -emergent -mcs -aiming -dmso -ultrason -##iax -##ham -latex -##uish -lich -calcitonin -abstinence -coal -locomotion -tetram -han -pcv -cavities -actu -##athyroidism -188 -dentate -sensitivities -dissip -mimicking -spear -blunt -##gin -##eas -##yll -preschool -dendrites -##vef -hrp -semin -xrd -endo -ltp -pericardial -lox -orthopedic -specificities -episodic -gcs -eph -##ault -lacks -uniqu -longevity -bridging -dfs -grains -frames -probing -clonidine -consolidation -polysaccharides -##ocarb -deuter -##epr -chlamydia -142 -##oluminescence -seaw -regimes -##erver -r1 -guar -lambs -figure -choles -chimer -l2 -transitional -burns -##cranial -alpha1 -nosocomial -vii -##ocarcinoma -stabilize -galactosidase -edi -##itt -pull -screws -easier -##orial -##timulation -##acylglycerol -dropped -untrans -makers -gondii -##bu -nanofib -dichroism -##uronic -intolerance -nematode -##iatr -##cil -senior -diazepam -ideas -ensuring -slc -decid -primitive -##icked -constrained -ups -prolapse -neutralization -resilience -provinc -fluxes -unless -addi -stakeholders -##entified -orthog -haloper -emerge -germline -theophylline -vldl -vasoconstriction -recre -org -crystallization -paternal -predisposition -luteal -##rofen -isra -yolk -primate -##ogaster -anneal -quartile -hsct -##agglutinin -##atoxin -##house -optimizing -ophthalmic -bradykinin -seasons -literacy -forebrain -dispens -pam -exceed -##ulous -absorbance -lists -##ervated -coff -adenocarcinomas -multifactorial -coronal -disrupt -programming -##trate -016 -inference -hydrocarbon -wc -##tebral -facilitation -peers -shoot -extrinsic -stressful -microbes -holds -prevail -genomics -165 -carbonate -navigation -parity -morphometric -##rp -entering -haloperidol -##roportion -feces -##erating -lacked -haemodynamic -equine -residency -loos -##isson -##formed -deprived -prednisolone -goats -##terone -gangli -filtering -game -gliomas -##otocin -defer -##duction -partitioning -psycho -##atally -schizophrenic -emphys -esr -adiposity -shortly -141 -013 -##etts -##ocic -##iley -evaluates -feet -bacteremia -photosynthesis -##lp -diurnal -collision -hsc -hypothetic -immunomodulatory -##idth -sphere -glutamic -occult -rop -pedicle -dap -barr -chemoradi -prednisone -ulnar -acknow -unequ -##abain -##ipa -##pine -glycosylated -obsc -spiritual -dying -gdm -gabaergic -predisposing -regards -subscales -myx -meals -delirium -arf -gaz -autonomy -##acetyl -occupied -suv -gathered -xenograft -gln -splitting -jug -convert -##ospital -bph -acetylation -##uent -lies -##ck -chloroplast -##ocol -##02 -cock -isoelectric -substituents -##con -iia -cingulate -facile -boost -psychotherapy -iat -##olipids -ouabain -##yrib -handed -##aliana -ciliary -bilaterally -##tains -nanoscale -oligosaccharides -punc -fossa -thaliana -subcortical -##observer -convenience -olds -eug -strips -scn -aneu -therapists -ascer -catch -hips -014 -eosinophilic -facing -neurof -sitting -pfc -##width -anorexia -lobes -firstly -considers -bisphosph -thalamic -##cholinesterase -##ken -poultry -##uccess -##riage -vitamins -laws -adjustments -beta2 -repro -pauc -linole -biocompatibility -prompted -files -sounds -ecology -##iaceae -##oxal -carries -mbp -excreted -mining -sister -rapamycin -360 -320 -varieties -vesicular -trimethyl -##ersed -##ipramine -149 -undergraduate -##v1 -n1 -glucan -retur -biochemistry -##agl -wavelengths -##quilibrium -consequent -##ropr -##uccessful -harvesting -nonr -intercal -lot -gastrectomy -honey -letter -##usted -158 -011 -shifting -foxp3 -notice -##wt -wine -hfd -solved -phone -versions -dynamical -taurine -syph -disproportion -corpor -neurogenesis -seb -##×10 -swallowing -peaked -win -presently -unsuccessful -##ifies -olive -piglets -trophic -thrombolysis -vulgar -ug -1981 -divertic -##efined -unpr -##usions -018 -##ognitive -##atized -##si -ribonucle -##amides -rom -worked -appearing -##amphetamine -interdisciplinary -diagr -pten -writing -benchmark -savings -degrading -b3 -solute -dosages -psychology -nasopharyngeal -mutagenic -supram -equil -oligomers -ncs -constipation -##vitamin -##lycaemia -##1r -##odermal -resembling -fim -##oproliferative -aneuploid -places -th17 -230 -brachy -##param -turned -concomitantly -algae -vulgaris -saving -predominance -homozyg -dere -gonadal -##gran -exempl -##obia -karyotype -intrahepatic -normative -encapsulation -##illing -moved -##icide -293 -granulosa -##157 -chemotactic -comment -mycoplas -switched -doping -##hydrop -stric -cruz -pitch -doing -1r -biases -crs -hematocrit -ingredients -constituent -##eld -metam -imposs -thousand -subpopulation -orch -##anous -##otoxins -topology -##cens -mexican -comprehension -chym -##istinguish -##imal -ihc -stems -crossing -##zo -bioassay -commission -2c -colocal -##inol -##ronate -poisson -belonged -1c -pineal -aided -##iling -streptoz -standardization -fi -octa -##ki -histi -##istering -advancement -##ried -##e1 -gastritis -oedema -##udine -mpt -subl -plasmon -explants -eus -##icate -transd -tinn -immunocytochemistry -jaund -crack -gate -speaking -erectile -transcranial -readers -ancient -##wich -sulfide -voltamm -incon -preced -ink -interrup -precl -acetone -nociceptive -appendic -##iana -encounter -##imotor -alcohols -##ano -alert -kt -decarboxylase -hydrocephalus -calorimetry -##exual -ticks -spearman -melanogaster -patterning -recognised -hollow -glandular -##ara -couple -ch3 -combat -father -##plicating -147 -toluene -carotene -280 -b12 -interruption -supplemental -lasted -anastomotic -extensor -assume -chicks -grip -jur -imposed -liquids -messages -terrestrial -pm2 -tinnitus -##okes -statistic -aldehyde -antagonism -quies -latencies -reoperation -inward -##eratin -grey -vh -detoxification -clav -drawing -sq -##unted -hepatectomy -foster -possessing -anap -message -##istinguishable -statement -##aminidase -annexin -sepha -ingested -##nac -incidences -bronchi -##tii -##atism -vmax -ruptured -polyuns -ctx -inbred -##87 -##ario -cca -warming -reconstructions -##ocytoma -autonomous -retroperitoneal -phagocytic -μmol -catalyzes -manipulated -subspec -estrogens -sepharose -ali -erythromycin -##ng -smallest -wit -mitosis -topography -017 -bevac -sophisticated -afferents -lactamase -eic -homa -forsk -gravity -uncou -gins -h4 -prokary -solids -restraint -tried -bevacizumab -##kes -tms -##itoneum -esc -holding -##ulo -choosing -poses -ebp -assimil -arabin -invertebr -##onergic -gbs -terminated -gemc -152 -radiolabeled -##aca -crash -hyperal -grid -comparatively -bronchoalveolar -msm -helping -nephrot -histochemical -##enicol -154 -setup -rash -gamm -##oresist -##abr -pandemic -mev -cred -secretions -instructions -beet -mpo -steatosis -tsp -enkephal -##opid -arbitrary -cps -eae -##imetic -attendance -lad -panic -nucleation -japonic -atcc -erythropoietin -uroth -indistinguishable -noneth -looking -thiaz -intent -matern -epithelia -##hem -ptc -##entral -intras -transportation -ecd -gating -capsules -##2o3 -##rov -##po -nonetheless -##globulin -ampicillin -coffee -favored -debrid -teen -transparent -##96 -qrs -aggregated -fractionated -distraction -basolateral -lvef -intracerebral -hypop -plausible -##udi -catecholamines -hetero -immunocomprom -polyunsaturated -gemcitabine -planes -autoimmunity -multidimensional -cyclodex -valence -localize -cores -interested -desensitization -##ainees -diluted -hydroxyapatite -010 -infrastructure -asbest -sensorimotor -contig -speciation -destabil -##i2 -debr -paw -quit -chief -das -driver -lever -sandwich -strands -lignin -thymocytes -afforded -tacrolimus -neuropsychiatric -multimodal -swiss -##aris -mimics -##com -##vertebral -##icile -fibrotic -deliveries -destructive -standardised -prothrombin -lymphoblastic -##ogrel -##acers -dyslipidemia -narrative -##ropol -##gus -##force -cited -##ammonium -dvt -projected -##ophthal -adr -##aec -##arate -shrink -##tisone -px -##othiaz -flowering -##endicular -hyperinsulin -caudate -cotrans -cbt -ltd -bmt -multifunctional -forskolin -utero -eif -beat -pss -chew -serotonergic -equipped -chemotaxis -antinocic -cytometric -neurosci -biosensor -antagonistic -##ublic -##olinium -##imab -##osphere -inducer -barley -cholera -steep -vigorous -mesop -klebs -neurogenic -advancing -bari -myof -##itinib -constituted -folic -##here -##oge -overnight -##opent -arthroscopic -cocc -atri -inspection -##ovol -schedules -aed -hypothetical -cleaning -synchronization -fails -capsular -lex -uveitis -cope -ejac -fathers -##94 -coatings -neglect -perpendicular -personalized -intrathecal -telem -##acted -hydrolys -polych -156 -alkaloids -flo -nonm -vegetative -lifespan -hydrodynamic -embryogenesis -modulator -##enedi -amyloidosis -##itates -prerequisite -interconn -monos -silk -##tention -vasodilation -prion -##elioma -h7 -##astin -wbc -erp -nigra -collections -dentists -buffered -multifocal -192 -etiological -modulators -##imburs -devoid -upr -ewes -##acia -bupivacaine -eukaryotes -roughness -otitis -##elity -##iximab -haart -pcbs -##edicine -aun -costly -##emoral -covers -##ocyanate -eliminating -thyroxine -operon -##oxazole -hepatoma -v1 -shedding -24h -electrocardiogram -book -neighbour -republic -friction -wol -##oliosis -advantageous -occupation -##enium -vicinity -microglial -270 -tu -digestib -##oventricular -blacks -hydrophob -pyrrol -##5a -##olor -predictable -##odom -echin -##oves -repeatability -oat -##estin -kpa -##ij -reimburs -##ecu -recon -semiconductor -##vert -flora -intrig -ferric -coma -##oxins -discs -tunel -celiac -##inescent -maneu -impossible -##inone -streptozotocin -##ulinum -tunn -secrete -going -pictures -analyte -##angu -##ardiac -cytokeratin -twist -mutational -dissociated -worker -sic -folds -trapped -apob -surrounded -##ael -alleviate -phosphodiesterase -inspired -straw -##adine -layered -bz -meiotic -c3h -nh4 -neovascularization -gfap -wavegu -campyl -ets -resections -pcna -till -damp -visualize -licens -##agas -repaired -immersion -##uzumab -##oplasma -pointed -t2d -##onomically -##entful -##apeptide -counselling -advis -reconstructive -restrictions -dwelling -originate -##eresis -mist -##aenoic -##fraction -syphilis -allografts -syncy -sts -imt -tuned -chambers -uneventful -store -herbiv -hens -evolve -##etent -palp -homo -klebsiella -adjunctive -immunocompromised -difficile -sativ -##algia -##olia -edges -nationally -coum -tricuspid -modular -sequentially -redistribution -ovid -epstein -micropartic -##trypt -##osteric -neb -sigmoid -unidentified -labile -protons -receptive -price -##azepine -msec -huge -env -drg -workload -antiproliferative -augment -reconstitution -pufa -instant -##1a1 -pertin -##eit -tensor -##class -pharyngeal -cec -trainees -stic -replacing -surfactants -##mer -cts -formaldehyde -restorations -##phenol -unpreced -nanor -oblique -isotherm -##idis -##drug -mosquitoes -exacerbations -clopid -scid -contrad -voxel -ehr -2r -ua -ec50 -hk -cnt -saw -stz -apoa -manus -nep -harvest -guiding -filamentous -unprecedented -outper -ascertained -purkin -udp -elucidation -sixth -aper -##ineral -amphoter -clopidogrel -supervision -##opolym -frameworks -synthes -activators -analyzer -intraepithelial -##bt -purkinje -qds -microbiome -biophysical -elsew -pim -chrys -gle -campylobacter -compressive -elsewhere -##wa -##ethanolamine -purine -ending -companies -productive -interneurons -effectors -integrative -pbmcs -preex -immunohistochemically -##ira -##ilton -##emide -frailty -encouraged -anticonv -osmol -warning -dissected -straightforward -peroxisome -##oct -##astically -filtered -glomerulonephritis -##adone -thyrot -elastase -##nem -cxcr4 -##oscope -isoflurane -underestimated -hunting -##rogl -cruzi -ribosome -occlusal -##iabetic -coenzyme -feeling -succinate -torsion -subacute -legislation -##oneg -strategic -uvb -##itants -atlantic -spong -ventilated -mug -infrequent -leukotri -##issive -infan -mesang -eta -vehicles -a3 -##oprote -##level -hydroxylation -orthogonal -jaundice -thy -biodiversity -151 -listed -bariatric -bos -##ycle -mif -##ylyl -gib -conjunctival -wheel -ulceration -depress -260 -##agues -nonsm -shut -trachea -pharmacology -oligomer -pole -##had -##91 -##iva -luminescence -ensemble -conting -mao -moments -imaged -resemble -danish -hbe -adhesions -spirom -hydroxide -rick -resum -deph -opinions -collagenase -plga -dss -intriguing -##562 -##amphenicol -##rical -flexor -neurodegeneration -##graphic -synt -##loxacin -manuscrip -pneumon -hamilton -trimeth -gata -##etaxel -confusion -emphysema -ao -##tizing -microvess -##pg -##tilled -confirmatory -dlb -##azepam -##epile -ascorbate -erythema -trout -penicill -iol -##osecond -h1n1 -flood -##uresis -arrival -tunable -reorganization -##tl -docetaxel -porphyrin -wedge -norway -cyp2c -cohes -consecu -appetite -safer -iterative -pover -computation -occluded -##emp -abiotic -hyperpar -bioreact -asn -polyd -##opsin -##ason -serous -immunoglobulins -ye -##tructured -poverty -ontario -gum -##plo -ara -μl -opens -jugular -##bing -157 -tha -diode -ccs -genotoxic -immunogenic -brca1 -methadone -mant -scatter -fvi -ionizing -smc -ventilator -dx -mannitol -microarrays -##emias -ku -exchang -devast -rostral -pher -benzodiazepine -bread -##bg -compensate -falling -cuc -mcl -mm2 -##asth -emitting -willingness -ovx -qrt -consecutively -abrupt -metabolized -l3 -sickness -iiia -oxalate -unre -igg1 -unfolding -ellip -transformations -atrioventricular -ceramide -ended -ferment -ards -substantia -##his -reasoning -ribose -mgl -photocatalytic -exit -conferred -##ko -albeit -glycolysis -lbp -##thrombotic -##arith -scoliosis -750 -##itely -reaches -rbcs -pigmented -hypn -myrist -##yridine -fidelity -oroph -##obiotic -##atheter -tka -inotropic -tracing -mastectomy -##bens -utilizes -anhydr -lie -acted -coming -approached -ori -worth -slides -##juana -granulomatous -##atility -guarant -reinforced -harboring -reticular -married -reflectance -granulocytes -paralleled -apl -##rosy -vasoactive -pertinent -##b3 -eluted -corros -##uce -##faction -macroc -shig -retarded -motivated -macromolecules -philos -digested -1500 -tactile -##olymph -paroxys -export -energetic -##otherm -defence -##obutyric -crim -caesarean -portable -biogenesis -install -177 -unr -antagonized -editing -fes -phenyle -weather -richness -uniquely -##ellate -heroin -healed -##urition -metron -ancestral -sutures -##hc -##glucose -bear -1979 -isotopic -thromboembolic -spikes -hemolysis -neglected -cz -##aturation -verification -ascribed -infiltrate -phle -pill -##airs -coordinates -##omogene -pluripotent -hidden -bromo -colleagues -overlo -accumbens -mct -destroy -##df -acetabular -denaturation -migratory -agriculture -phenylephrine -revolution -workshop -restrictive -investment -denervation -glucopyran -acetylcholinesterase -##idus -midazolam -centrom -polymorphonuclear -pag -incidental -sln -appoint -begins -lytic -##ifers -abrogated -temporally -advent -gauge -chloramphenicol -constriction -authentic -chemilum -conflu -retinol -convergent -rct -spore -intract -hypercholesterolemia -inclusions -aav -cct -infest -implication -dol -ovariectomized -neurite -cta -sct -##ankton -begun -prostaglandins -damaging -##olated -helices -tends -##odeoxy -debridement -cyclohex -vibrio -elasticity -motions -pairing -epiph -tender -etop -blindness -acne -35s -arsen -topois -oxides -##estock -tann -debris -narrowing -##well -outlines -doubling -spiked -mitogenic -pherom -microstructure -thromboxane -livestock -appendicitis -p27 -##onad -deformities -logarith -##icine -relates -harmon -empty -##life -asbestos -supramolecular -##achol -coexistence -##oem -disaster -cd2 -enteral -pea -labels -notes -mmps -wky -hallmark -##odia -oscillatory -census -ordering -##rna -glycosamin -##oviruses -supra -##icially -lessons -irs -##ozapine -sonographic -suck -##aminophen -disparity -caco -ais -graz -biol -answered -analgesics -pie -##odeox -##ubated -174 -no2 -etoposide -breeds -detr -yag -anticonvuls -anisotropic -averaging -valvular -liposome -##att -enthal -anger -euthan -consultations -marijuana -tonsill -anthrac -cruciate -washout -lysosomes -toxicological -glcnac -infectivity -gaze -declining -auxin -necrotizing -wiley -amphotericin -syring -##align -paradig -business -stemi -discer -disciplines -159 -bradyc -coexp -nonf -nanowires -compulsive -intravitre -interven -lec -##thi -awake -contrasting -205 -technically -implantable -paradigms -halluc -virgin -oscillation -o157 -##tian -hev -eru -intranasal -##yne -chir -computing -binge -checked -fx -##april -explicitly -visu -radionuclide -##noea -chorionic -dies -drops -leprosy -hazardous -thalassemia -mics -bite -roughly -controll -##arring -hung -bit -orientations -hek -##inesia -serocon -transposition -##oselectivity -consor -sialic -k562 -epig -##olus -##ovsk -##pic -explos -##ulph -##apopt -antidi -lining -compatibility -##b4 -comes -enorm -019 -spss -remodelling -herpesvirus -scs -##aked -xenografts -multilevel -missense -cdnas -pc12 -untranslated -remainder -mitigate -operatively -slit -jet -aromatase -##opeptidase -abscesses -renewal -bomb -##yelination -metropol -fluctuation -catabolism -chondro -porosity -rods -##uoden -insem -spatiotemporal -mismat -b7 -thousands -##dehydes -##±2 -##igibility -intractable -methyltransferase -esterase -interpreting -##sulf -##through -influenzae -opiate -fe3 -templates -preconditioning -srs -plug -thp -repressor -neurodevelopmental -hns -ipv -mek -##uprofen -##odipine -thail -subscale -##tegration -ampa -fingers -spermatogenesis -opened -culturally -devastating -##arone -fulfilled -devised -governing -epist -drb1 -##ocks -drift -caloric -photographs -thailand -##76 -obliter -antipsychotics -whitney -paucity -gmp -##isters -##retin -kinematics -##uridine -sibling -invariant -##qi -craniofacial -##oxan -pct -catast -innovation -biodegradable -##orphine -ameliorated -rew -##nish -midbrain -##optera -hms -sewage -pra -stereotactic -##arby -occasional -carbachol -confers -clozapine -rendered -pms -amphiph -##minth -naphthal -injecting -##ivocal -unal -dermis -##iens -relapses -##light -182 -assemblies -basin -##yb -pathogenetic -row -ud -nephritis -##gesia -##heal -atresia -linguistic -ordinary -semiqu -no3 -acetaminophen -photoreceptor -divalent -hyperparathyroidism -alcoholism -reproduce -stratum -185 -linoleic -##aned -##othane -chloroform -reti -ureth -##3b -##ocyanin -genotypic -trisomy -think -adrenaline -dams -initio -##enib -endors -disclosure -compensated -rabies -##clusive -reu -ams -mesangial -shot -##orus -##oplasia -lu -##arrhythmic -##74 -pharmacotherapy -crosslink -cylindrical -elongated -boron -##erals -pressor -tregs -##tail -deployment -warrant -ibuprofen -creates -bare -owners -##osulf -serov -eyelid -electrically -##aventricular -alkali -transfers -packaging -strikingly -hydrolase -delineate -##net -scanned -annotation -nip -##rophosph -philosoph -masked -captopril -botulinum -##entions -##afluor -nach -pq -supposed -tcp -solubilized -bird -cephalospor -authorities -organisation -cyanide -distorted -quater -mount -fvc -memories -washed -fad -fistulas -thoroughly -nanomaterials -inflamed -resembles -goat -##phase -relapsing -assurance -ria -antiepile -brachytherapy -flower -societies -unaltered -maxill -cms -heads -eligibility -insert -ventricul -acknowled -discriminating -council -enantiomers -abstr -mj -invited -halothane -argued -##utation -preventable -polyst -superimp -bridges -robot -p65 -pda -reader -##2c -myopia -##orphic -225 -popl -dipl -capsid -##kal -sertoli -egfp -cytologic -##mu -crh -##istan -contraception -crossed -randomization -##phos -micronucle -##obium -##ilin -junctional -##lofen -162 -impairs -dimeric -vor -aggress -##93 -doll -bla -gaining -##sd -##atre -ultrastructure -xi -judgments -nitroph -factorial -ral -minus -enkephalin -##roma -##ronch -annealing -##eller -splenectomy -xl -178 -##onge -##recogn -transduced -checkpoint -priorities -empower -##olase -leishmaniasis -triton -positional -##oris -arrested -listeria -gadolinium -adipocyte -##kinase -crystallographic -##oned -neurole -eruption -prur -breaking -lethality -creb -##omeres -##peridone -shrinkage -##elic -mmc -microf -##plas -puer -anatom -lipophilic -hemostasis -4th -nonun -digoxin -##thermal -tether -##eastern -rhabdomy -siv -eluting -##osomiasis -dtpa -seawater -##bur -nigeria -mediation -##ogluc -fluc -introduces -immortal -##f4 -neurocognitive -acidification -hydrophobicity -streptomyces -bioactivity -##orth -inhomogene -sarcomas -paramagnetic -spacing -immunocytochemical -comprehens -drinkers -##oinos -pyrimidine -astrocyt -##ospasm -hampered -photonic -saph -you -atg -jejunum -photodynamic -statements -ventricles -occlusive -homologue -ornith -specification -chord -listen -residing -districts -simulating -32p -##trated -dialys -##iculus -him -lun -worms -subsp -ternary -denitr -cd13 -lati -tetanus -bandwidth -prodrug -organis -educators -situated -melanomas -genis -symmetrical -##onates -preparing -fna -pulsatile -bifurcation -ranked -##legia -objectively -cubic -oxygenase -refinement -##akary -cream -degrade -territory -##ingly -execution -syncope -zoon -hairpin -intima -g6 -##ispens -hyaluron -historically -1978 -diastere -adequacy -numerically -soleus -anxi -##mitters -##roscopically -vdr -##olars -paroxysmal -shares -proceeds -chloroquine -environmentally -oesophag -##ometrically -lying -etiologies -doubled -164 -bilayers -cpap -parp -drastically -reactiv -fluoxetine -##lasia -##ini -groove -allosteric -lentiv -l5 -nct0 -etiologic -dysm -permeable -##nc -polycystic -##iant -##ineural -heated -banding -abnorm -melanin -osteoclasts -ffa -trin -neurotransmission -cadaveric -holes -backgrounds -farmers -calibrated -strengthening -push -dilem -ems -lamellar -radiosens -##ami -impulse -166 -##idene -reproduced -whee -##yryl -195 -tio -monoph -##oplatin -reared -oestrogen -superiority -nondiabetic -coarse -immigrants -twitch -placing -evaporation -manipulations -##entanil -##ocutaneous -hydroper -streptococci -flowers -##ocyclic -##eprazole -burnout -indispens -evening -##uro -##aterial -##yrid -repressed -cmr -sett -bean -evac -additives -reviewing -outdoor -##methyl -reflexes -aminoglyc -quadru -mycoplasma -cloud -connecting -choroid -paracrine -##onation -canals -scap -pedig -answers -iatrogenic -##ocent -michael -substanti -shallow -engraft -probabil -autoreg -ferm -shaping -fis -##idinium -imrt -participates -hundreds -adri -sga -infertile -infantile -cycloh -spheres -concan -monoxide -5th -herd -endonuclease -doi -##lysis -amenable -valpro -amphib -0005 -glomeruli -264 -mimicked -symp -synapse -pest -##aturity -multist -##apoptotic -nmd -trough -cftr -##olab -##allic -reviewers -probabilistic -expectation -visited -uncertainties -heterozygosity -muller -##asin -anthropogenic -e3 -##phi -plei -##ropin -##kb -sp1 -completing -dsc -elementary -hydrolyzed -dephosph -counterpart -mortem -ppd -167 -##orphyrin -##atine -colch -judgment -transgen -tnfalpha -cron -po2 -##ofuran -1975 -pyrophosph -titres -androst -equivalents -metalloproteinases -dimeth -clotting -myofibrob -migrate -ideation -aza -##fection -##berry -nz -interrupted -pbl -cnv -spa -cic -proposal -confron -polystyrene -phosphoryl -##timulated -dinit -##omatis -incorporates -interests -csp -clinicopathologic -scene -steric -##lys -imperative -nanoc -failing -committed -exciting -lpa -oleic -mediastin -workforce -dwi -submucosal -##yron -##rals -axes -secure -hed -##alis -##onegative -concre -##atergic -seeded -gallst -##anat -##yseal -igg4 -phylogeny -mounted -obstacles -tendons -maintains -rigidity -simvastatin -##azosin -said -carbap -dividing -descriptors -##book -suppl -stacking -g3 -witness -geometries -##187 -##opreval -sponge -##86 -pleth -##ja -marginally -asparag -justified -diphosphate -synchronized -##anide -lond -ameliorate -eclam -syngene -smoked -##ishment -mts -trophoblast -##urethral -monoamine -conductive -seropreval -abnormally -mobil -dish -##ximide -methylp -ch2 -hook -metropolitan -cyclodextrin -ecmo -song -reprogramm -##oretin -worst -genistein -wards -kar -handle -hypoplasia -acros -##icides -marketing -granuloma -mycobacterial -##ima -163 -unmet -arose -herds -raf -reacting -motivational -##omatic -strengthen -##hu -##arenal -##adec -barb -restoring -occupancy -blastocyst -##avalin -quadric -patellar -anchored -tcm -##vian -glutamyl -lect -traction -inacc -climatic -##ocystis -##azo -##transferases -##lab -##inic -##ympath -subdivid -speeds -sterol -bleomycin -o3 -oligo -sevo -biomaterials -deals -i2 -##aulic -drawback -6j -pharmaceuticals -scavenger -megakary -thematic -bib -disag -chlorine -acinar -fove -sinusoidal -syngeneic -##orem -##p4 -gerd -neurotoxic -readings -##entistry -##ximal -swabs -phenotyp -172 -##x10 -h⋯o -eradic -169 -##acea -feat -premenopausal -cd16 -violent -disabled -abstracts -introns -topoisomerase -survivin -unrespons -acetyltransferase -chaperone -carboplatin -forests -minerals -correspondence -retroviral -dispersive -osteomyelitis -##oly -hhv -glycosyl -unpred -rhamn -empirically -subtraction -hong -nyst -bioch -stricture -adrenoceptors -accred -##esp -synergistically -larynx -disposal -thc -vacuoles -vasodilator -mildly -nest -quadrup -catalyze -addic -ei -glutamatergic -proteoglycan -intimal -demanding -##imilar -perfluor -myeloperoxidase -intravitreal -eclampsia -bead -5d -concanavalin -retrovirus -##ospecific -##oxylin -viet -deacet -trachomatis -specially -dangerous -imatinib -mth -acp -##arians -physics -icr -cyclosporin -##terolateral -##inus -lym -##burg -androgens -dithi -neurotransmitters -212 -resembled -monoc -ortholog -##sr -gpa -manually -infect -adds -dentistry -oryz -##uitr -perivascular -reuptake -lem -##tryptamine -covariance -xanthine -##otonic -##inia -ken -adver -##amers -butyrate -steroidal -nanocrystals -##onasal -photoelectron -rectus -corrosion -cardiology -rhythmic -methacrylate -##l2 -##ocene -epicardial -glycans -##ovalent -chemopre -##cys -limbic -decided -consul -peptic -##anthine -instillation -##oflav -nis -suspicious -##uctal -176 -brdu -elucidating -cavernous -constraint -civil -161 -nephrotic -wearing -brack -##thesis -intercourse -orthotopic -blo -fragile -niger -pgf2 -quiescent -msp -lpl -contradict -perceive -lasers -funded -lactating -appraisal -##athic -supervised -cryo -uti -pertaining -pcd -precipitated -dab -birthweight -evaluable -counted -##anthin -copolymers -pph -preincubation -scarring -referrals -registries -##agger -integrins -retraction -alpha2 -bedside -motile -breakthrough -##operiod -##ymic -ionophore -##kd -##ocalcin -chemoat -glia -polycyclic -yeasts -imagery -pean -dissatis -homeostatic -##uses -thoracotomy -bradycardia -xenobiotic -##imine -bivariate -antiepileptic -informal -london -pubertal -deemed -eia -saphen -barium -underp -##c3 -tachy -bun -##400 -analysing -amy -dict -indispensable -024 -longest -##nv -fertile -preh -snail -triti -pei -microcirculation -intracl -sevoflurane -##ocap -028 -2d3 -moss -reserved -extrusion -metronidazole -folded -bcs -engaging -ranking -colchicine -irritation -dereg -hardness -biodegradation -##iteal -preexisting -bon -urothelial -cancerous -guanosine -jejunal -heparan -albino -##cus -186 -##atars -##ava -##etron -adl -183 -##ifera -hsp90 -contributor -256 -morphogenetic -##orectal -chondroitin -sv40 -##odys -cddp -cytosine -##urred -##thra -arrangements -nematodes -##endoth -artifact -flour -asph -brass -practically -##lofenac -electrocardiographic -resonant -uf -appreciable -##ogue -amoxic -intentions -reimbursement -motoneurons -weighing -lacz -paran -portug -prison -denmark -##orrhiz -rotating -lichen -1960 -##ieu -##82 -germinal -rescued -mtb -intimate -microparticles -complaint -substituent -mont -vpa -wax -##odynam -serologic -atra -lysates -refers -doubt -k1 -intraventricular -##idate -##ip1 -cryopreservation -mucinous -gambling -enormous -graduate -antiarrhythmic -gastroc -autoradiography -##t2 -macrol -##otin -dysk -seroprevalence -##ithiasis -##ancing -suction -atrophic -propionate -021 -alex -sequestration -moll -vb -hyperalgesia -neurosurgical -subfamily -prism -unrecogn -interior -secondly -fitc -alarm -181 -023 -acanth -carn -pyel -gpcr -anchor -##otracheal -insertions -engraftment -examin -infiltrates -reconstruct -mucous -autocrine -lncrnas -burd -bovis -insult -triage -ppb -##tc -metaplasia -slowing -photoperiod -biop -pairwise -sof -222 -##iters -##ado -unsu -kain -rankl -tnm -203 -leach -morphologies -outward -superconduc -municipal -ugt -##μm -pis -try -##olded -pectin -pathologists -arterioles -fmd -carcinogen -##ugg -mutans -sleepiness -enfor -commonest -gluten -carpal -027 -hyperlipidemia -gpr -deer -antim -dhea -macromolecular -industries -unamb -tcdd -pixel -##articular -mole -dsa -interobserver -inoculum -fmol -tlr2 -oestradiol -##89 -xr -##ytoin -p2x -inconclusive -##apsed -persists -##romatic -overlooked -4d -##abdominal -traps -mm3 -formulas -204 -centrally -depths -##idation -pla2 -israel -##oplankton -cortices -lor -luteinizing -expense -##itidis -flavonoid -complexation -unpredict -##thoracic -lipoxygenase -##brand -bursts -tmj -##igo -exosomes -bam -##arine -chori -aldehydes -exactly -dystonia -h5 -thyroidectomy -printed -warrants -azo -tad -##opeptide -subdivided -##olis -headaches -speculate -vietnam -202 -burk -##gut -mcc -ctc -##uated -pneumothorax -helminth -hydraulic -foetal -transcatheter -billion -lectins -biocomp -attenuates -##ympathetic -zirc -diuretic -trichlor -dba -unambiguous -administering -retaining -##125 -translate -ancest -transmitter -singleton -tic -airborne -glycan -suggestion -##mas -elite -methamphetamine -minimized -##84 -gleason -##icated -principally -vn -remnant -odd -fasted -epoxy -unsatis -##ille -##ellosis -##word -strokes -thro -needles -hyperthyroidism -ivig -rubber -ovalbumin -ator -foraging -qualified -noncoding -lose -fun -##enh -oscc -##omn -sinuses -thrombolytic -synergy -encounters -codons -##tons -imprinted -##adel -serogroup -##brt -##ticism -succe -pz -glucosidase -ultrafil -attract -unt -dysfunctional -rumen -##rosine -urtic -osteoporotic -inequalities -##rein -171 -hyperv -skew -gpx -interacted -regressions -cbct -enterococcus -173 -passing -transection -nowad -nonster -halogen -etching -diversification -criminal -mite -##asant -indol -visco -nowadays -##epines -blunted -##uitry -migrating -unem -gastroesophageal -escc -liposomal -##ifiers -teacher -vertebrae -##mr -##acillin -bold -cannabinoid -advertis -##icit -accidental -##entin -morbidities -amplic -incidents -exertion -educated -##enzymes -##field -efferent -denture -##prof -dre -reasonably -leaflet -conflicts -##acies -##gamma -proceed -quadriceps -pathologically -##eve -malays -thermo -##oxamine -osteocalcin -rotator -competency -##avirin -lax -##iensis -##atech -atlas -disequilibrium -paths -exceptional -##eximide -quart -colorimetric -acrylic -##ecoxib -exocytosis -ptp -##ofil -neurophysiological -bever -closing -volunteer -salient -psycin -steadily -psycinfo -ossification -plastid -##uman -phenanth -arguments -mcr -foramen -company -rfa -yog -1976 -##regn -neonate -psoriatic -##robenz -##utions -pta -cycloheximide -abor -##ostat -concordant -pointing -unified -ovine -isopropyl -##elected -gdp -##icc -cd20 -norw -hatching -threefold -cres -myogenic -##olones -cancell -predator -greenhouse -##like -206 -chewing -wells -quaternary -tank -inqu -##ilance -1977 -algal -##etra -ecz -finland -##opamine -socially -hardware -dar -hepatotoxicity -payment -osseous -ready -incompletely -membranous -fishes -gun -hscs -fort -immunocompetent -pigmentation -adriamycin -leader -tremend -##ethylation -cascades -polyphenols -msh -ridge -##oprotection -atorvastatin -encompassing -diclofenac -neurofib -panc -questioned -blank -022 -emphasizing -##osurgery -tags -waveform -##92 -exacerbated -026 -##ydig -sephad -diaphragmatic -##onomical -staged -oxidizing -enucle -##bia -classifications -faecalis -l4 -sephadex -graphical -##etary -duty -biphenyl -exhaustion -fluoroquin -##aki -##itories -intraclass -pellets -##argin -##omycosis -mans -##cing -amoxicillin -gynecologic -uncovered -##oca -opn -bark -fiss -##omandibular -uremic -nuclease -nearby -reductive -exploit -sas -wilc -ribavirin -##ticle -snap -combinator -svr -##gc -staphylococcal -heritability -primord -representatives -aminobutyric -dnase -##alg -stature -compartmental -cbd -gill -eosinophilia -chemoattract -invasiveness -turk -fibros -##oencephal -publicly -##vd -mong -mra -##holder -apple -trypanosoma -##olysin -leaching -saudi -overproduc -##electronic -cronbach -clues -calcified -therapist -got -bout -kall -scars -##ql -condyl -fusarium -attainment -cassette -quinone -dext -184 -evidences -aliph -leydig -saphenous -vice -msi -absorpti -187 -multicent -decompens -pepsin -kong -condensed -mesoporous -##odopa -kits -markov -reversibly -##afenib -leptosp -##alline -mush -agglutination -isopren -##othing -phenobarbital -homeless -nts -##fly -bubble -interpretations -##oinositi -##arity -observing -lymphadenopathy -##awa -##etization -laun -nag -societal -separating -detectors -##astig -mountain -braf -libitum -cd28 -aeg -risky -constantly -letters -pumps -##plicates -recap -interrel -transaminase -##opathological -##antit -assemble -kp -reabsorption -affiliated -mycobacteria -histomorph -operators -##axin -zoster -cfs -kel -##epid -cpa -kat -##acrimal -perovsk -classifier -proliferator -probnp -cmc -culturing -economically -sarcoplas -arres -counties -disinfection -serr -expenditures -pcc -5000 -infliximab -##oco -lobectomy -binocular -##formation -hcmv -beverages -##ulline -associates -##otrypsin -haematological -deoxyrib -purely -cheese -ban -elastin -##ogenin -backward -egcg -psychiatr -hsd -thermally -disrupting -##alo -dural -nervosa -fermented -bacilli -calpain -anesthetics -sers -##occl -etoh -208 -eccentric -ornithine -hyperg -psychomotor -ribosomes -sacral -035 -latit -dysfunctions -sectors -##oped -pericard -entail -edition -tetrah -##iparous -constitution -anticoagulants -216 -##oresp -##itives -hindlim -ruled -discussing -dorm -holistic -gastrocnem -aliphatic -emitted -recreational -compute -antih -wille -pls -##aneurys -endocrin -##ofemoral -manuscript -##ellae -sharply -chemiluminescence -weal -##idomide -voiding -ultrasonographic -obsessive -##appab -##ronchial -##eptin -hmgb1 -lakes -shaft -ketone -ambiguous -tas -genesis -##fin -clay -wel -lesioned -##mn -vincr -consortium -simplicity -coral -absorptiometry -bacteriophage -##osterol -mesothelioma -damages -voltammetry -gout -##yelin -exagger -p300 -3000 -seriously -ankyl -gynaec -chop -followup -carbamazepine -urgently -certified -##imethyl -lingual -carboxylate -masking -thior -saharan -330 -recognizes -kap -##anda -contaminant -##agens -##rows -dnas -outlet -##oreflex -impulsivity -withdrawn -b16 -colloid -inev -producers -##ubstituted -tca -spo -##anum -worsened -##yline -bloodstream -##tigo -##adap -thoughts -immunophen -punct -annular -##aggreg -contracep -inhabitants -rifampicin -provinces -tuberculous -fluconazole -##ocus -correlating -##arboxylic -defines -osteogenesis -digestibility -commens -insecticide -##ecia -##anil -##endym -spectrometric -##aired -academ -abdom -##edullary -runs -wilcoxon -fascia -lengthening -801 -##oo -heightened -combinatorial -whenever -anxious -ssp -graves -dti -deprot -poland -provoked -posttreatment -##ynch -combustion -abi -ada -levodopa -endotracheal -electrolytes -modifiable -maximally -galectin -distinctly -hexagonal -0002 -throat -cid -lanth -multicentre -zol -outputs -ocul -proxy -lub -##rb -extravas -pediatr -parasympathetic -##acetamol -vv -ortho -intu -##oviral -partnership -contraceptives -xylose -pect -stains -fibrinolytic -hither -gastrocnemius -rins -willebrand -vincristine -encl -238 -categorical -wise -179 -214 -loh -gil -styles -resolving -semiquantit -sustainability -##adish -franc -governed -shortage -eut -##wi -##adesh -##analysis -hitherto -circuitry -versa -##ults -##oxidation -##asion -genders -unemploy -pollutant -isotropic -209 -necrops -cryptospor -leis -loosening -fellows -##ocyanine -bidirectional -associative -arthrop -##artum -unmod -stations -##ald -antimicrobials -hexane -seronegative -practicing -##uretics -meiosis -##osting -adopting -texas -nephrotoxicity -pine -restorative -pale -##tz -193 -tetrachlor -redundant -sustaining -furos -coincided -landmarks -dece -##tistic -tryptic -##ussion -##oconjug -vals -biotrans -ontology -##agglutination -##iodarone -generator -nash -vitrectomy -neuroscience -hypon -handic -oncological -glycolytic -hypertonic -dimerization -identifiable -exceeds -physiotherapy -regenerated -##onide -manufacturers -devoted -ligase -virological -rumin -artem -photochemical -##whel -hps -attraction -##west -##actam -psp -##trained -pheochrom -relatedness -feline -grp -benzodiazepines -complained -schistosomiasis -sio2 -##osyltransferase -hnscc -russ -carotenoids -lymphadenectomy -ultraf -generalization -##aro -click -splenocytes -##yal -spasm -chelating -nearest -##ariae -##side -rainfall -sst -##ateness -clp -nong -hypercap -anaplastic -##f3 -furosemide -microch -tlc -diamond -##obenzene -chose -intervertebral -airflow -proliferate -pupil -##une -eventual -phonological -##osperm -fle -synucle -permeabil -favorably -predispose -discordant -tma -caucasians -noradrenergic -reacts -drinks -##trac -herniation -official -overwhel -mmse -leisure -citiz -##kg -##ophor -metastas -horser -##ophthalm -pige -referring -prepro -streams -returning -##so4 -wait -och -concrete -heifers -microfil -diuretics -coexisting -##ume -esbl -carriage -toxoplasma -circumferential -raph -transthoracic -##odynamically -spines -##bra -homologues -vulv -compressed -rag -bifid -overdose -##osc -cleared -##aut -heterosexual -##otecan -nicotinamide -colonized -##erh -##ref -cyclization -internalized -beck -closest -233 -magnetization -broken -implicate -sarcoplasmic -compete -##pregn -facet -simulator -##83 -geometrical -p2y -horseradish -pmma -batteries -harbor -serology -##ophytes -##obacterial -huvecs -##umann -carcinogens -impregn -preferable -complementation -dcm -##omon -1990s -aperture -cardior -215 -paracetamol -empathy -concentric -outstanding -itp -transdermal -##eri -gard -dosimetry -intramedullary -myristate -regenerating -207 -contrasts -##oplasmosis -forec -myopathy -refuge -g4 -femt -attribute -hypotensive -barc -bq -nations -disadvantage -annotated -achieves -imidazole -navig -gingivalis -crosslinking -t1d -noncom -diol -##fas -##una -223 -urethra -##ofer -hindered -htt -resonances -fluoroscopy -cim -enum -hyaluronic -##oxycycl -phenytoin -realize -##erae -##urized -mptp -sleeping -epididymal -persistently -cdi -synuclein -##ostom -##alcoh -discipline -osmolality -linkages -contacted -toxicology -derivatization -harbour -##ometers -tubal -graduates -footpr -john -ost -##lt -##lyp -biomolecules -tram -##ubertal -toe -opposing -hbc -##iferous -intratum -natal -##oprol -tnfα -blots -cooking -clades -cep -receipt -diversion -##ophene -antral -antithrombin -request -reprogramming -awa -##ohem -cyn -gastroenteritis -glycosides -##ikrein -##101 -1200 -performs -increments -bell -17beta -##annab -221 -anesthesi -scrap -##took -skilled -stoichiometry -##ubercul -##bp1 -tunneling -locked -unsatisfactory -a23 -fic -css -dynamically -aberration -viscoelastic -308 -amplify -isoflav -antithrombotic -##urus -shelf -bronchoscopy -expresses -obstetrics -entrap -lncrna -adoptive -benth -snake -probiotics -agitation -n2o -slopes -wires -##adenosine -comprehensively -##avian -h3k -vire -migrated -myocarditis -211 -housed -hyperpolar -inr -unrecognized -pigments -bronchodil -##130 -##eb -340 -cd11b -##oeba -nitropr -hydatid -pvp -paradoxical -##ap1 -##othyron -##anter -nhe -##etrically -##aka -##entation -daughter -550 -correcting -##romatosis -mars -undertook -mitomycin -##pm -synthesizing -ces -nitropruss -cu2 -##encephalic -diminish -tables -##d6 -varicella -bronchitis -##len -lexical -palmitate -aborig -durable -##ochrome -muss -remediation -microscopically -nsaid -rsd -##iceal -dizz -asympt -10th -meetings -peanut -conduit -illicit -pparg -##ploys -chimpan -amiodarone -lycop -ligaments -losartan -ruth -rearing -nonsteroidal -ambly -qd -glutar -borrel -pva -kernel -remember -approaching -stir -shrimp -amaz -assesses -##odine -windows -broiler -sulfon -nitroprusside -centi -obscure -##avers -##ounder -##ogle -burning -constructing -##oride -qualities -leukotriene -slowed -polluted -ril -fret -institutes -194 -hbeag -cart -southeast -polysomn -dmd -##olide -vcam -hematoxylin -artificially -v2 -##implantation -##essional -prospects -ttp -alop -evans -stainless -seldom -uh -nanocomposite -phleb -terminology -##ott -##acyclin -##ibrate -##orthy -189 -inert -ccr5 -stenoses -##imolar -##okine -euro -##atases -hydrothermal -hardly -isr -##users -priori -wales -transs -##ystem -lux -buck -infecting -lhrh -##gest -290 -##etom -rendering -tape -##cortisone -##hai -autoantibody -1974 -##onary -enterobacteriaceae -029 -admix -##yelinating -glas -communicate -functionalization -rater -metaphase -yrs -phages -sweat -##dg -photod -relational -##erged -centred -requested -orific -tack -191 -unip -anastomoses -debil -226 -032 -hierarchy -diph -alleviated -ivc -ricketts -nitrous -nh3 -##anting -pheromone -autum -gan -##ologr -##timulatory -##ern -macaques -##ingual -economical -oncogenes -ca3 -emulsions -keratinocyte -a1c -extrahepatic -infin -membered -dissipation -ez -glycation -phytochemical -h2s -certif -##tigmine -thumb -##enafil -mentally -##icals -distilled -beij -electrop -discern -economy -nets -020 -pitfall -belg -anticonvulsant -##ensives -##agers -##ultural -##istin -##ounted -discriminative -##±3 -carol -draining -accommodate -unmodified -competencies -##itrate -employs -pyridyl -bioreactor -degran -momentum -d4 -argen -unusually -starts -roman -metabolically -lastly -##cv -effusions -fossil -##ecd -##ectants -advocated -rounds -contradictory -carers -microelectro -continuation -##bank -##itative -schiff -dissimilar -##han -looked -optimally -delineated -cime -zr -shad -metab -assimilation -##lich -meq -rgd -palladium -solv -preca -basilar -squared -microdialysis -glucuronide -lis -meningiomas -muscul -apr -nonsignificant -drives -psychiatrists -##vinyl -cones -chair -trehal -fuller -##plate -myd -##accum -nanocomposites -prohib -debated -incisors -unpredictable -stance -milieu -##trig -##uel -##ein -##erally -vad -ercp -antioxidative -capacitance -urod -##vic -subclavian -micronutr -unresolved -##omedial -optically -congruent -##apsular -cardiomyocyte -##olymer -##ifl -casual -disagre -##nb -astrocyte -##lass -cimetidine -seat -longitudinally -justify -##s1 -nanotube -sclc -cmax -ggt -##ylaxis -hur -phosphoinositi -upright -psc -reservoirs -stellate -castration -bcc -glc -sarcop -aflatoxin -adhere -1980s -confinement -##onent -##fully -##osylation -##rednis -workup -##usate -upa -concludes -weighed -##ulence -eczema -nasc -incentives -iner -##rotal -218 -turning -modulatory -abut -doxycycl -##yly -mdct -pick -hospice -putamen -justice -##aban -deteriorated -oligosaccharide -ltc -cognate -biocompatible -##adecan -grazing -##oxygenation -seroconversion -fatality -##ynucle -deceased -sprou -freund -staphylococci -eo -##olem -file -antimal -##years -##uin -printing -##oprim -disabling -cbs -##days -spons -hydrocortisone -confid -##kii -interch -anode -crystallography -##ellulose -ltr -wealth -went -anxiolytic -xx -merely -grouping -inhibin -misuse -fundament -calcine -##orel -##ceptive -##buminuria -hypercalc -irrele -iaa -##orhabd -positives -immobil -inflation -##usor -want -astigm -systemically -hemip -dav -unwanted -predators -descent -appropriateness -##tervention -dms -sterilization -trou -##utter -sars -clamping -demyelination -2s -hon -photons -proteoglycans -phasic -cooh -uridine -glutaraldehyde -4a -sliding -036 -mns -enterocol -antenna -##pv -nonsmok -timed -scaled -epc -workflow -taq -dollars -litterm -piece -denatured -045 -##guan -gtpase -pleiotropic -chow -prevailing -wanted -xylan -profoundly -##tory -thinning -shoots -alcoholics -exhal -enj -calcineurin -triad -fits -##600 -deployed -##ispers -##oons -games -310 -plasmonic -ccl4 -analyzes -fuzz -mutually -yearly -##oidy -##ieving -##omyosin -2n -aneurysmal -doxycycline -##angiogenic -##othyronine -heterochrom -names -crispr -metatars -mglur -hpr -adsorbent -##parametric -cholester -nebul -298 -ctr -varices -anaphylaxis -rfs -atherogenic -##gtt -temperate -locking -epidemics -epitheli -deafness -legion -4h -popularity -dizziness -##itz -fragmented -unfolded -protonated -##osystem -hyperplastic -lon -##ilyl -exchanger -abo -pst -##opancre -wav -##reek -254 -enantiomer -chloroplasts -haemophilus -oligodendrocytes -##muir -floral -avers -hydroxytryptamine -communicating -complexed -pieces -##ophenol -obstacle -instantaneous -##μg -alve -crosses -##fast -benefici -troch -esthetic -344 -methylprednis -xa -recanal -blastocysts -cystatin -waveguide -tamp -##aba -noxious -235 -##otomies -##apillary -langmuir -opg -jejuni -##ysts -authority -lyase -pharmacist -exaggerated -maternity -tympan -flame -langerh -aj -##oreactive -upward -a23187 -##othermal -dinucleotide -quadratic -relaxed -coils -observable -pds -##phig -ingredient -##azem -icsi -hemispheric -hemispheres -antine -pbp -chemoradiotherapy -##oxides -tyrosinase -parac -bps -##elective -victimization -communications -##drs -iddm -colp -##ecrosis -##angeal -cd10 -##iformis -##esters -allyl -switzer -burdens -acidity -chymotrypsin -adrenocortical -mastitis -villages -wherein -locate -exped -entrapment -derivation -mst -callosum -c8 -##rem -tremendous -haemolytic -snow -stopping -##xime -langerhans -centros -pemphig -budget -t7 -psd -##ocardial -qtc -oropharyngeal -##coagul -catabolic -switzerland -antinociceptive -diagram -reflective -cyp3a4 -rejected -facts -quadrant -β2 -psychotropic -pna -##itazone -irrelevant -persisting -cfa -polyamine -mastic -##iro -cyanobacteria -zn2 -fusions -transactivation -cathode -qi -astigmatism -##trum -pruritus -##osse -granulomas -##imers -stigm -meningioma -std -046 -##oresistance -031 -perineal -spectrophotometric -verm -##orr -ligated -familiarity -efs -##foot -anabolic -miscar -s6 -nab -ssi -zik -responds -electromyography -##pharm -computationally -undesirable -viz -##ysm -##rett -conservatively -listening -exposing -##adjusted -prokaryotic -sdf -##at1 -##arf -florida -hypocal -histones -mz -dihydrop -aunps -##erus -campaigns -prostacyclin -##ethal -threats -ctla -concentrates -clad -ecules -exponentially -zona -pond -methylprednisolone -junior -turns -##r3 -immigrant -circle -hirs -evolutionarily -cilia -##assemb -copyr -vasospasm -imported -corneas -gonadotroph -hill -endorphin -##antigen -##entgen -sirt1 -##accept -readiness -dxa -passages -##tina -bacteriological -probed -##hou -micelle -pco2 -alloys -2019 -##h4 -##isco -##acid -##81 -theme -neuroprotection -brings -curing -dht -actuarial -meso -irel -neiss -##olization -##board -037 -alkaloid -311 -homologs -##gow -nominal -fasl -manure -beijing -police -##iciting -refraction -trast -##dine -myoglobin -copyright -spirometry -##rodesis -valine -##03 -bmscs -ireland -prematurity -temporomandibular -lvh -insecticides -nystag -myotub -dysplastic -matches -establishes -retinoblastoma -##enet -emuls -humeral -e6 -machines -slide -##tists -agglutinin -antid -corrections -cm3 -discriminated -herm -aptamer -homod -chondrocyte -autumn -##tiazem -375 -keratitis -##esthesia -rw -unic -ipf -quartz -optics -releases -fumig -traces -tee -ch4 -##serine -213 -asians -electrosp -aspartic -##beta1 -frank -demyelinating -hemostatic -applies -comments -##opharmaceu -034 -olanz -##ao -discoveries -dorsolateral -alongside -##breeding -posed -defl -sao -##hydryl -##ropic -ancestry -separations -gang -behave -cages -segmented -acinet -cholestasis -merits -multilayer -carcinoid -agnps -monod -##ocations -nanofibers -##arctic -carotenoid -##ureth -distension -##oglobulin -##ront -##bryonic -enal -vow -ministry -neuropeptides -listeners -saa -stringent -metabolomics -038 -trastuzumab -olanzapine -≥1 -destin -zeol -hav -unlabeled -##child -cholerae -1973 -mmr -palatal -tumoral -fuzzy -##dy -##issible -fibrinolysis -##imidazole -distract -##ouses -bibli -kyn -##transp -homogenate -mch -acquiring -activin -synch -##uccin -speck -##biased -dibut -congestion -alopecia -bee -trimethoprim -relieve -dlbcl -##haem -dilute -rhodamine -##havi -subn -unil -obt -wildlife -gonorrh -rubella -cina -##obronchial -hypoxemia -##phosphatidyl -##biotic -spending -modal -##x3 -ceft -noticeable -explanatory -tcs -haemodialysis -myasth -guest -##vin -potently -aas -ferro -033 -reads -screens -diltiazem -ruthenium -asynchron -tort -##otherapies -zoonotic -protozo -hybridized -plating -hco3 -bland -dermatology -simpler -grounded -acinetobacter -citrus -pace -inequality -intraoperatively -##ymm -##arcomas -glasgow -ppp -mcm -cyano -logic -cardioresp -neurochemical -urokinase -stalk -##oprolol -melanocytes -malate -photoluminescence -##my -specialties -dk -tmd -unresectable -translocations -insec -wavelet -##umber -048 -risperidone -cardiorespiratory -sinusitis -pvn -pumping -convinc -predation -##ugr -organelle -solvation -cystectomy -##ipenem -subma -chemoattractant -##can -ddt -calorie -exhaus -##prop -##omorphine -flatten -vertigo -congeners -roentgen -##osteal -4000 -anonymous -exploiting -kan -apd -sentence -##angitis -##imeric -hypoglycemic -pbc -ell -transparency -dpph -extravasation -terminally -ipa -tace -dnp -##etomidine -argument -gabaa -loose -unbiased -232 -pacap -vz -norweg -##atia -tpo -icg -arthroscopy -aca -organizing -insemination -##imumab -marks -formyl -saturable -exchanges -##illed -developmentally -hematuria -asphyx -responder -xer -implied -##isen -enthalpy -neoin -aband -##ila -appreciated -tmp -presump -##abl -bioge -biotic -hyperint -cystein -##eto -fluency -bent -probands -lymphoproliferative -##renorphine -photosensitiz -orex -valley -lov -amil -224 -quadrupole -##cnts -isothi -avenues -##olaryng -##omyelin -hemic -permissive -connect -intracereb -ripening -crust -termini -reside -##repres -membership -hyperpolarization -quoti -aluminium -##oproteins -isozymes -coincident -nystagmus -##ermined -stimulant -omeprazole -fcr -superimposed -turkish -sre -rises -##openic -maxilla -anf -lipolysis -corrects -##etitive -vo2max -##osts -hindlimb -##chemic -unresponsive -##indole -chyl -nanoshe -photocatal -subspecies -sativa -undiagnosed -shrna -asymptotic -cau -endarter -dibenz -contributors -##ho -argon -##wood -nw -basophil -seeding -420 -subtotal -slaughter -unstimulated -unselected -cochlea -tia -callus -s3 -##gers -##insically -##ocere -phosphates -graphite -poster -apa -octan -pursuit -##atide -##ithin -habitu -estrus -granulation -wasting -unsp -diary -influential -sz -##inoids -cx43 -inflow -exocrine -hypok -tp53 -complemented -habitual -athle -speakers -##diagnosis -##ospinal -carrag -##flora -verb -benzoyl -##iplatin -immunod -pharmacologically -sows -naphthalene -aboriginal -azide -aerial -microbiology -uva -decide -aphasia -hub -carbapenem -phthalate -prod -iodo -intrag -recurred -compelling -phac -xps -biceps -streptococcal -pao2 -gavage -detrusor -fasci -enalapril -regi -nonsmokers -intraluminal -pmns -##ollagen -barrel -elicits -detects -##oposterior -mate -1950 -pseudoaneurys -##eor -orph -##kat -fried -weighting -propos -pps -manager -eliciting -cannula -##tert -neurobiological -enjoy -powered -camer -##anar -buds -047 -photoreceptors -pipeline -##iry -childbirth -##imentary -##orbital -finnish -pgp -##adiaz -butanol -##iiod -capsul -autistic -##oacetic -650 -conspic -innovations -sulfated -dissatisfaction -manipulate -interrelations -xy -octre -downward -reapp -carrageen -nitrog -##othione -mimetic -reef -blend -##car -anaerob -mansoni -truly -ignored -parturition -##acral -chemosens -strictures -polyvinyl -fischer -traced -osteoblastic -at1 -realization -amiloride -msa -curved -solutes -escalation -comet -##abetic -##phone -cryopreserved -dialysate -uniformity -##mar -extraord -##urface -attempting -##obiliary -destroyed -##lycerid -##oglut -underscore -triiod -haploid -durability -pdms -caregiving -overcoming -infrequently -c7 -kallikrein -tetrap -biotransformation -stimulator -lysate -scarc -thrombi -antich -contrasted -femtosecond -tolerate -epoxide -getting -tracers -glucosamine -commentary -hologr -peptidase -##opterin -bronchiol -porphyr -myocl -contracture -lacrimal -densit -ccc -##ophyte -walled -sentences -dub -##mph -##ismus -citric -mx -cinahl -kras -estrous -antiinflammatory -embedding -##annel -nascent -contiguous -huvec -barrett -ambig -oxaliplatin -##admin -phyto -equivalence -virions -039 -gwas -≥2 -compliant -accumulates -##reen -dobut -##king -tuberc -##odynia -odn -academy -tz -##d4 -365 -actinomyc -seventh -urological -parkinsonism -##itical -cylinder -hydrated -laev -bleaching -hatch -inducers -baumann -##roventricular -micellar -##6a -microinjection -vascularization -accomplish -##osseous -217 -monolith -imperf -##western -anca -g0 -maxillofacial -unadjusted -divisions -meniscus -unaccept -waveforms -niddm -extant -integrates -manufacturer -ops -##tices -rtms -sab -zip -rls -exo -##entioned -##gly -ultrac -pegylated -thorax -##rum -formerly -sulfhydryl -yb -##ozoites -norwegian -239 -dobutamine -fibromy -iugr -kines -hay -stressor -pparγ -##urge -opa -semiquantitative -##eping -cch -palpable -##mb -pfge -universities -intracerebroventricular -locoreg -infarcts -stereotyp -moderated -laying -argues -submaximal -tig -sati -apomorphine -geor -ppt -gob -fission -tps -homogenous -pentobarbital -e7 -dnmt -genotoxicity -accelerating -metagen -##elt -reposition -chile -ultrafiltration -story -radon -##osylated -sphingomyelin -aseptic -bridged -tracheostomy -raphe -5a -submandibular -hispan -mcg -popliteal -imping -mainst -##isy -equilibr -##orespons -pellet -##unts -cultivar -unload -##erts -##biosis -nil -anchoring -ideally -localisation -dichotom -##feri -amphiphilic -micromolar -advised -thicknesses -##encephalon -hypogonad -##ronic -azath -##olstein -candidiasis -fviii -lmp -csc -##izability -accommodation -nonresp -mace -storm -##opar -##aks -microbe -npv -c16 -faeces -hydroxybut -ptca -hematopoiesis -chlorhex -##ype -paramount -##lorinated -380 -deeply -##illes -cq -##ighter -s1p -t0 -##igan -thermost -cnts -##ildenafil -##alcoholic -thf -opto -mantle -directors -imprinting -herbs -275 -##uis -responsibilities -mn2 -topographic -bees -wisc -3b -radiolig -nitroso -epidermidis -carib -immunologically -nnos -##gp -marriage -ptb -hypoglycaemia -gay -oxidored -propria -sink -chromophore -##ynyl -incompatible -azathiop -solubilization -rarity -neuroh -7th -neisseria -crime -localised -rebound -fon -irf -mailed -hcm -seedling -brit -pml -oscillator -heterodimer -neutr -biotechnology -gase -neurovascular -naming -tga -transesophageal -tubul -flick -reactors -photoin -reinforcing -perovskite -chlorhexidine -wish -oncologic -extracting -injectable -fecund -neurology -polyposis -gef -royal -initiates -retire -zw -favoring -demethyl -##oniaz -iranian -patterned -kyph -continence -##quinoline -##azoline -phosphorylase -bangl -##ths -achievable -dor -vagina -urgency -infestation -##reless -370 -##ouns -immunofluorescent -aneuploidy -periodically -deae -bangladesh -abortions -transmural -prazosin -##okines -oldest -anatomically -subdural -pathophysiologic -buffalo -midwives -hypertrig -##anone -manipulating -##eliness -biodistribution -millions -smd -intrinsically -480 -mites -hypoc -##inflammation -cem -aspergill -cushing -transients -contracted -##bec -digest -pitfalls -sildenafil -electrochem -hnf -uranium -colored -##i1 -adn -notew -counteract -electromyographic -piezo -instrumented -spiking -humor -pgs -##oton -sos -logical -##okinin -inferences -acetylated -avoids -##iet -spliced -identities -##oflavin -biochemically -nadp -inductive -supplies -debilitating -biomechan -tertile -bmc -nitroglycer -##methylation -maneuver -##pb -drawbacks -preg -homosexual -cholangitis -sider -##torh -thiored -228 -estrogenic -compromising -ed50 -presumptive -kcn -evacuation -convex -prrs -##ultured -fractal -defensive -indeterm -cadavers -photosystem -crush -isoniaz -##oduoden -sonic -##osi -annulus -azathioprine -baumannii -knowing -scru -denaturing -butter -diat -lymphangi -burgd -##κb -ict -ependym -seemingly -##uronium -balf -unbound -adapting -cba -##azin -phenotypically -controller -adenylyl -##omib -sorafenib -hypophys -lyme -##operfusion -radiologist -wrong -licensed -transvers -vsmc -clip -baroreflex -niv -tpp -runoff -abduction -burgdor -##ellin -heterocyclic -hydrolytic -repolarization -caen -##ogastric -prag -merit -gj -phosphatases -corners -ptd -pgi2 -##etus -manufactured -intensified -metabolizing -claimed -hypercalcemia -##omening -unheal -wireless -uncoupling -##centr -##endothelial -pach -##h2o -noteworthy -holstein -mof -google -##ucine -dichloro -cryptic -##orptive -potentiate -##yer -unco -ags -penal -relied -vsmcs -leucocyte -investigator -##professional -poc -beneath -sulcus -##bw -interconnected -nrs -pleomorphic -oligomeric -bacl -globular -burgdorferi -jak2 -##orhabditis -pns -transurethral -naa -calcifications -sulfoxide -n3 -degranulation -##ervical -court -heterozygotes -##db -##access -245 -hypertriglycerid -boots -characterisation -##adol -##rology -lett -meter -##cysteine -3beta -nci -mbl -shortest -##folate -isomerization -pyrophosphate -precipitate -progressing -tumorigenic -so2 -b3lyp -##aea -fetoprotein -##transplantation -tailed -##bear -purpura -trehalose -mission -##trid -dihedral -swab -##omethane -hcs -00001 -##trex -dac -utilisation -##ennial -##ostosis -budding -buprenorphine -lpr -##otap -glycated -ale -amend -occlusions -ki67 -geographically -spasticity -your -sexuality -underpinning -incisor -reinforce -chronological -##ran -##immune -phantoms -##1a2 -##arth -rivers -thanks -demonstrable -##etries -##sm -adjuv -hyponatre -berg -thyroiditis -agn -guarantee -singular -proto -commiss -##uma -mlc -biv -gaseous -vascularized -myelodys -##urities -classifying -connexin -visiting -ahi -rhoa -vivax -polyt -##af1 -fak -##idines -tan -042 -dystrophin -inefficient -##180 -phenomenological -recapit -roi -palmitoyl -##cription -pld -appeal -219 -convey -isoniazid -dysregulated -facets -assignments -svm -anteroposterior -##odin -transposon -##amyl -pgc -allop -washington -##ovi -rheological -##cel -club -embolic -##arabine -website -ay -k2 -halo -homing -##orient -collim -##rogenesis -serovar -ryan -consume -delineation -##ivirus -necropsy -##rosthetic -##ospermia -##burn -fabricate -euc -grounds -permitting -player -enzymatically -sax -ish -novelty -ssr -##iper -tut -instructed -village -##arche -unexplored -styrene -##balanced -##q11 -artemis -discontinuous -histopathologically -resins -symbiotic -peep -##ondral -categorization -transv -##urrent -043 -##children -anaesthetized -stillbirth -streptomycin -postp -afterwards -isothermal -##00000000 -valgus -sjogr -##bv -##n3 -genbank -ryr -##ectoris -nonex -234 -leucocytes -externally -abundances -##atids -pept -joining -nyha -##yled -achilles -pvr -neuraminidase -grape -urticaria -##ourished -certainly -##ometabolic -##ematous -microglobulin -taught -aversive -multicellular -##293 -##romegal -##osable -involunt -##activated -cd56 -##esin -albuminuria -contraindications -relieved -eliminates -##lc -architectures -certification -nicu -keyword -mpp -##ffer -transcriptionally -orifice -invading -cannulation -v3 -synchro -heritable -negoti -chat -esophagitis -##ensory -hgh -broadband -hydroxymethyl -ensures -tenderness -ncr -calcane -endodontic -sps -##ca1 -phosphatidylserine -sjogren -6th -cd19 -api -acetaldehyde -visibility -241 -shh -stomatal -octreotide -dephosphorylation -prick -hospitalisation -auxiliary -##igenin -rit -incar -innerv -neuromod -affordable -nanorods -hun -immunosuppress -thai -forth -##ochondral -perfusate -##tri -stn -acclimation -vitell -avr -##ocortex -scrutin -bats -##onol -midgut -bottlen -mmt -sensorineural -heel -telemedicine -aggressiveness -##radi -isoenzyme -incap -##being -glucoside -5s -coul -aii -##ternal -classroom -##oting -immunoassays -microorganism -directing -morphometry -urease -diphther -myoblasts -##opolymer -trpv1 -rotations -postulate -##acyt -news -locoregional -breakfast -sulfonyl -frustr -049 -progresses -mesoderm -##osclerosis -##hom -valued -ppargamma -cyto -##ollen -fibril -##ells -##actyl -epcs -##elong -calculi -prepubertal -proceeded -hernias -invertebrates -pakistan -##map -tetramethyl -bacul -immortalized -wellbeing -tdcs -biosensors -omp -xp -##amoeba -taxonomy -hispanics -vene -bpm -##onins -pectoris -hedge -##owed -deuterium -decel -subdiv -ip3 -fimbr -##d3 -nitrophenyl -thall -slip -sant -maxima -##zomib -##othi -229 -##hog -deaminase -cb1 -ambul -miniature -conventionally -imipenem -interferes -##verted -magnitudes -##tress -triterp -fts -##omplex -conclusive -wd -##avicular -adhered -clarithromycin -polyg -##ucting -lactis -rage -##porter -1966 -##fluorescence -laevis -##pot -##osh -molecularly -rectang -ihd -vasoconstrictor -##ivudine -mcd -kilob -ancestor -quantitated -polychlorinated -dates -whey -3β -2m -phosphoinositide -amni -pheochromocytoma -golden -approx -##fts -dosed -exf -say -##orylated -ebs -minnes -##amido -predefined -##cc1 -penetrate -spondylitis -reliance -terp -anoxic -jew -adrenalectomy -##reatment -acrylamide -##azide -tbars -##lock -titre -##othec -offenders -sunlight -##position -practiced -rectif -iap -synchrotron -##itil -huntington -entrance -modelled -therapeutically -##apore -microsurgical -overwhelming -##itably -myocyte -##kl -ricin -arctic -dsp -##937 -beating -biotinylated -p50 -##endazole -tst -penis -##electroly -default -jurkat -20th -interrog -irrig -dipyrid -antiphosph -##coprotein -panels -migrants -cardio -ploidy -##kinetic -##erson -s100a -cryptosporidium -lactofer -replicates -framing -myelinated -lysophosph -phosphatidylethanolamine -caenorhabditis -fap -##isk -glycaemic -##rote -retirement -##apto -sons -orches -sclerosing -ssa -citizens -equivocal -041 -crystallin -amoe -compiled -isotonic -##delta -progestin -##lessness -editor -acromegal -radiosurgery -bdi -og -accessed -##retion -minnesota -money -bca -depicted -fingerprint -mindfulness -rug -restrict -##mics -borders -carolina -displacements -heavier -mdm2 -gss -deterministic -lubric -extensions -athletic -dentition -##tiveness -beneficiaries -nephron -hypnotic -caf -myb -promptly -##orphism -dmf -bulky -modifies -confluent -##osities -##itrite -ogtt -epithelioid -vzv -mmf -tones -controllable -##olinergic -avium -fibrillary -glucone -##ographies -drastic -anteced -pdi -overlying -degenerate -fibromyalgia -viremia -qa -fragility -##ophytic -mta -##occlusion -cour -ceftr -236 -negativity -sectioned -mdma -defenses -flavor -laminar -mainstay -facilitators -mine -##2r -urodynamic -ner -forel -nadir -##cryst -clath -17a -sedative -compass -##missions -bioassays -fluorine -ginseng -tampon -ccd -stump -mthfr -ui -universally -gynecological -indexed -nf1 -entang -serially -248 -agend -ceftriax -urs -endarterectomy -pleu -hypothermic -administer -fluidity -riboflavin -interspecific -thioredoxin -extubation -##onecrosis -enhancers -igh -herbicide -town -draft -floating -acknowledged -##alasin -kre -f3 -matr -nonrandom -amalg -##acholine -originates -immersed -gm1 -mehg -eigen -replacements -carboxylase -##ifferentiation -##ystitis -youths -endocardial -##ostigmine -cycload -nim -lactone -##opram -corroborated -egr -##fv -supervis -##romed -##odomain -arte -enterica -##3t3 -antidiabetic -##behavi -sid -mbc -impurities -adenoviral -##oproph -indeterminate -perone -strugg -landf -pear -unavail -lyophil -pant -##arius -refine -syringe -##oning -##rice -##italopram -cory -hapt -pthr -antimalarial -##arded -amh -##welling -ngs -adventi -##tite -usp -##afted -325 -rps -perpetr -caspases -appreciation -genu -strab -hypermethylation -##idian -juxt -##s2 -kup -carbonic -tracked -kiss -meteor -intratumoral -accreditation -##antes -trying -preload -perforated -party -blasts -quartiles -fractured -disinf -singapore -antitr -ductus -nonhuman -dioxin -cholecystitis -repairs -door -lifelong -aro -swollen -modifier -proceeding -hba -mos2 -ceftriaxone -humerus -offs -mutagenicity -mediastinum -potencies -willi -ht1a -latino -estimations -diffusivity -##atial -condyle -##tilb -##oronary -fcs -elaborate -abbre -weaknesses -b4 -cus -proprio -##ureter -##anserin -craving -preparative -transfused -cecal -##n4 -##individual -##tigens -saccades -kyoto -racemic -cholecystokinin -sorted -buffers -cach -impr -metacarp -balancing -##±4 -phylogenetically -johns -interphase -scleral -inception -##stalk -markets -isoenzymes -dsdna -sgc -p75 -##aploid -otolaryng -asthmatics -landmark -exceptions -undetermined -tos -readmissions -##ohep -nanotechn -macron -##baric -explant -prehospital -demethylation -sulfameth -##ih -quadruplex -##acoustic -unprot -exercised -hydroxyvitamin -abbrevi -##jd -seeks -encephalomyelitis -duplicate -protonation -##iors -glasses -victor -##bos -synchrony -243 -coadmin -fra -shigella -##ocalization -tcc -neurofil -aforem -metallothione -atria -liability -aforementioned -angio -030 -odc -kenya -microflora -mdr1 -coales -anoph -##agal -duck -heights -maltreatment -marm -cholangiocarcinoma -##antigens -##oporphyrin -##ethane -044 -amyotrophic -##entilation -retinoid -1972 -pwv -oncologists -mend -##ypti -##ocentesis -colomb -mvd -hedgehog -##win -##ostal -hyaluronan -hallucinations -cdh -appendix -nanog -131i -lacun -rbp -##rowing -4b -orthostatic -gus -beverage -227 -##apical -gynecology -cholinesterase -homozygotes -##odyst -yers -15n -##126 -incarcer -fbs -rer -##ylv -telev -concerted -##adaptive -powders -hyponatremia -zh -cords -interindividual -shunting -vigilance -audio -astrogl -ferrous -242 -tall -brucella -hib -thiamine -exceptionally -1ra -viscous -symbion -flare -##oplegia -proficiency -keratocon -##enoids -##the -stear -ringer -unhealthy -##orbed -incidentally -hots -##aters -quail -gp120 -##arterial -byst -alkylation -gdnf -ionized -##alkyl -##onscious -##apsules -##nu -##erule -##anolic -cholesteryl -nitroglycerin -rolling -##aginous -petrole -pnp -##cement -humic -hyperoxia -pga -fluctuating -tfs -coiled -##ovulatory -rainbow -##iaa -spermine -exploitation -meniscal -##standing -emerges -tsa -log10 -innervated -inactivating -microalbuminuria -attenuating -bootstr -spectrophotometry -broadening -flush -##burgh -##duced -accelerates -##men -archaea -##renic -partnerships -##oremed -fecundity -##ospatial -levo -nucleophilic -lrp -##506 -cinnam -luminance -alfa -subgen -lacc -adjuvants -252 -ici -pread -seropositivity -##bearing -fron -nonl -hyperbaric -cardiogenic -cmt -##iclovir -cardioprotective -##hard -##critical -osas -inactivity -bottle -azithromycin -transcriptomic -scleroderma -nomen -advocate -hourly -stokes -##opathogenic -incisions -errone -cyp2d6 -carboxymethyl -clarity -astrocytic -neocortex -bacteroides -adenomatous -cci -recanalization -disks -densely -##olipin -lfa -nanowire -dexmed -methacholine -tetramer -petroleum -asymmetrical -fibrillar -##eight -inaccurate -exhaled -1970s -##othermic -fe3o4 -clind -ketones -runners -pcm -magnification -vacuolar -dipolar -antarctic -cine -synonymous -hydroxyproline -prevalences -pragmatic -hysteresis -triangular -glur -shortcom -languages -crosstalk -syntheses -polyneu -shortcomings -assault -##ecies -rheumatology -##arietal -acetylgluc -elaborated -bend -dcis -mesenchym -mechanistically -##exy -replicating -hcy -thicker -temporarily -dyskinesia -dexmedetomidine -chb -intraves -shuttle -viewpoint -privacy -sarcopenia -decis -postex -tgfβ -##oblastomas -eur -imipramine -##reshold -cytolytic -cargo -≥3 -##cb -ictal -##igel -stoichiometric -##ongru -lncap -cd1 -multifacet -sns -thiols -misdiagnosed -antiphospholipid -trus -furn -255 -##ecium -fulm -mitigation -benzoate -##116 -rnfl -u937 -acceptors -11c -anchorage -bisphosphonates -involuntary -rx -overproduction -ryanodine -chagas -absorbing -specialization -##azid -calv -multiplicity -##riers -##etting -breakage -shielding -formulate -questionable -##outs -clindamycin -edible -pione -##agle -metamorph -advancements -##trexone -nsp -actinomycin -ipr -crowns -implicating -dentine -recalc -demineral -celecoxib -##ytoplasmic -singly -quotient -soccer -mold -internally -radioiod -multifaceted -isth -operates -electronics -oocysts -##epi -##ohyd -externalizing -comfortable -russian -nymph -epilept -##architect -sce -##erea -aedes -nms -dehis -jointly -syncytial -infinite -endos -isozyme -##osmotic -covariate -isomerase -manufacture -##esartan -259 -##onated -casting -brand -schistosoma -fumigatus -bifunctional -##fused -littermates -0003 -photolysis -crab -ohda -##ifolia -strengthened -darkness -michaelis -imperfect -polyaden -rech -##inately -aerosols -##eland -mushroom -##cessing -pressing -vagus -snails -##h3 -##nts -265 -multiplication -gamet -##astigotes -iri -inductively -##ivial -##iptyline -condylar -plp -gig -##rotoxin -columbia -ncc -shel -intervening -nomencl -pend -borte -##reported -parall -tricyclic -cbp -suspect -meningococcal -peptidoglycan -tetrod -##fu -##acological -c1q -##aval -incongru -phagocytes -hac -emergencies -denitrification -bortezomib -##lational -neuroleptic -peroxyn -endings -interdep -cst -diacylglycerol -simian -enth -biob -1s -transvaginal -fourfold -blockage -retains -lactoferrin -nem -neuropathological -neuroinflammation -dyslex -dsrna -triiodothyronine -##oagulation -##icus -coordinating -hbo -nut -##itidine -underweight -baclofen -chances -preparedness -##ealth -phal -telomeric -concussion -##junctiv -##eno -##atography -gor -juveniles -polish -neurosurgery -deacetylase -accessions -##port -##erves -flocc -cognitively -bronchiolitis -deactivation -exudate -periodicals -multitude -fenes -metoprolol -edent -fixing -##rad -reassess -cd133 -renders -pdc -duplicated -##adate -##odone -251 -liz -##ituric -##hydroxy -##akia -pork -claud -f4 -##apheresis -##inesis -nucleosome -spt -denv -flip -accessing -epididymis -rgo -dce -tj -cbz -##tinib -senile -quiet -##etermined -fulminant -ovip -ultrafast -nonalcoholic -narcotic -dating -##adecyl -266 -nmdar -misle -vibrations -usability -##capes -melt -neurobehavi -startle -tetrodotoxin -save -monocytic -villous -237 -gratings -hemangioma -disparate -perturbed -discourse -partum -specify -rooms -formate -dentist -scopus -##ancerous -nanosheets -sis -allergies -polyamines -##lm -researches -hca -characterizes -transducers -vocabular -emit -predominated -##meter -facs -iud -biomaterial -qsar -relying -aspergillosis -encompasses -##menting -unrel -##oremediation -lithotr -capturing -6m -suite -cort -curricula -##alog -cvs -naoh -dilemma -315 -shocks -sublethal -transist -crass -laid -##graphs -cacl -##opharynx -grows -hydron -laterally -underlies -anesthesiologists -carrageenan -##opically -chlorinated -enantioselective -segregated -arthrodesis -depolarizing -scotland -charco -shunts -##transplant -##1p -vortex -arteriography -thawing -polyelectroly -##inum -conspicuous -jh -sight -rcbf -booster -triacylglycerol -immunoreg -rhodopsin -##orrhizal -toxoplasmosis -lamivudine -##ivir -speculated -##romazine -projecting -argentina -dma -indones -lmwh -##avidin -spaced -pedigree -farming -bubbles -cancellous -convulsions -belt -##edoch -endocytic -cyp1a1 -##ophyt -excurs -denervated -weaned -phr -monitors -adamts -##uer -wakefulness -##rosterone -circums -pediatricians -pdl -myasthenia -lind -spill -wheezing -columnar -glucuronidase -territories -##arrhythm -nanomolar -causality -##oven -colectomy -hemorrhages -ghz -##5b -inbreeding -##ellites -##ectant -suckling -neuroanat -infra -##omponent -absc -vasodilatation -##tins -pav -##omel -milder -myotubes -##alic -##butamol -wg -soldi -torr -aegypti -cscs -affords -##ht -hypercapnia -tamponade -nonadh -supras -vign -##encl -##space -physico -zirconia -pthrp -intentional -kev -pediatrics -##lete -valproate -syll -thawed -##thio -##quat -paa -inflammasome -##lymph -irritable -ther -calving -##edral -bimodal -pneumonitis -##eic -##olon -globe -sms -allodynia -fura -##opeptides -##ogloss -##uters -ipc -inulin -genit -equator -msv -thoracoscopic -##rolateral -246 -rrs -conceived -reentr -##yletic -unprotected -dione -##oty -qaly -##ocs -##sil -stands -salbutamol -##pregnant -peroxynitrite -amazon -nbs -hydroxyethyl -hemophilia -myopic -microvessels -street -initiator -obligate -television -##idosis -counsel -##oide -##axanthin -##eme -overestimated -nord -249 -##ectivities -thyrotropin -recovering -qtls -earthqu -internalizing -##onus -##liptin -bottleneck -trapez -mvc -040 -cld -plasmacyt -2500 -tung -diamin -evoke -nucleolar -fundamentally -repeating -transformants -caribbean -##wl -wastes -##equencing -arthro -cd38 -dystrophic -vg -invariably -caveolin -##itizing -charcoal -unravel -delaying -sock -244 -formats -pyrolysis -emin -psii -bpy -cpm -hek293 -captive -shells -schoolchildren -cardiometabolic -##7a -tars -empowerment -##agenic -extrapolation -itrac -amik -equity -295 -query -eicos -tetrazolium -##qs -##⁻¹ -##coming -##body -canopy -##har -tympanic -insults -diesel -pteryg -vertically -intradermal -appreci -spheroids -ddp -##cephaly -18s -duplications -preoptic -##arbox -247 -kupffer -mercapt -##enclamide -irin -hch -##onsin -cpe -recirc -tata -emboli -protozoan -heterotopic -certainty -##epsia -253 -##anate -pretest -##utres -thermophilic -440 -abstraction -telomeres -vegfr -##onders -adenosyl -intragastric -cycloaddition -spm -##orescence -mercapto -alumina -tentatively -advocacy -gk -pnd -##vhd -stays -temperament -compress -##amili -bullying -unfavour -habituation -##retinal -emr -##ocholine -substitutes -retail -confidential -##iso -paraventricular -scopol -wal -conspecific -completeness -antihist -characterise -catastrophic -excip -deoxyribonucle -ceftazid -glib -orbitals -lipof -##atur -transcutaneous -##che -precautions -kinin -minimization -formic -dyspepsia -##eptor -itraconazole -ramp -bisphosphate -parag -phenols -peru -odontogenic -profit -fluorodeoxy -##worth -tpn -reversibility -hil -extras -infiltrated -##tring -nes -senescent -autogenous -erps -naltrexone -##344 -##fi -capd -radioligand -contours -affairs -altman -##enolol -dehiscence -synovi -loud -rs10 -assumes -fms -amikacin -irinotecan -smcs -cephalosporins -conferring -insula -mismatched -oxidoreductase -concert -##ysteroid -restricting -redundancy -##ionyl -sorbitol -dislocations -##ryl -##lers -cereal -##lingual -bart -##roke -igt -queens -nipple -buffering -scalable -##oneu -permanently -chaotic -ganglioside -##iaxial -oxygenated -compositional -inclusive -hypothyroid -internationally -farnes -thinner -gulf -crystallinity -##hb -placentas -cartrid -wisconsin -##famide -##oprophylaxis -wheelch -recalled -alkylating -activations -electroporation -bival -cob -bras -lrr -convincing -ld50 -antifer -intensively -dpi -##nn -athlete -##obutyl -cystitis -haemorrhagic -hilar -##reach -ceftazidime -splanch -exhaustive -1971 -myelopathy -vitil -##tiform -##atri -massage -##a3 -administrators -cpc -##rey -polyureth -autophagic -pgr -divide -glue -rts -vta -deposit -##ibrils -fork -##ll -##ycholic -defibrillator -orfs -threatened -leukemias -appointment -ira -merist -isc -cape -myr -cytostatic -videos -agen -cohen -northeast -dash -informatics -malaysia -governance -kainate -adnex -bears -dihydroxyvitamin -capping -replicative -uracil -bcva -villi -ats -cyclopent -rsa -proposals -goblet -papilla -##amole -bulls -##oethylene -castrated -illumina -oint -oesophagus -##athin -spouses -trains -pdac -spider -lowers -ownership -##l3 -decont -craniotomy -disproportionately -2p -mph -crowding -##osarcomas -##cedural -pvc -chelation -##itism -invertebrate -##fm -ipt -transferring -pge1 -scfv -olt -proteinases -##ums -hscr -pae -isotype -favors -rantes -268 -decidual -unfamili -immunology -atopy -typed -##econds -tie -lift -glut4 -smr -ille -diphosph -##olamine -##angiect -humid -hyperinsulinemia -biochar -cervic -##factorily -##romal -atenolol -attrition -##aciens -converts -faith -morris -vaccinia -ethoxy -nq -##junction -moves -dispersions -fertilized -periventricular -parat -reca -nonsurgical -257 -borrelia -nonpregnant -##scat -##gener -modestly -customized -scopolamine -##kc -cheap -issued -electrophysiologic -c12 -305 -bicycle -dwarf -isothiocyanate -sphingosine -estimator -discriminatory -choledoch -intergenic -dentures -ankylosing -##uristic -requests -aquap -dyads -##eav -isole -unaw -polyneuropathy -rds -278 -taxol -casts -anticipate -posttransplant -aminoglycoside -arabia -##americ -batches -adopts -##electroph -survivorship -##enedione -interprofessional -opc -taqman -corrective -accord -##aparin -waals -losing -##cast -cautious -##osynthesis -lan -micrometer -##factors -microliters -##rolithiasis -endosomes -knot -miscarriage -kaposi -258 -##olytica -nachr -macrop -arteriolar -anticholinergic -umb -staffing -marketed -sirnas -computations -renewed -mesenchyme -##enne -vagotomy -##ocratic -employee -linol -ethylenedi -speckle -##ifos -apgar -regenerate -rpl -spermidine -retinas -fent -##atechin -dipyridamole -egta -outreach -##onis -exacerbate -correspondingly -chalc -jack -lifting -##planned -nef -incentive -##achus -anthracene -framesh -stripping -##tip -##oxine -273 -410 -##ventive -gastrop -##enched -nonparametric -matured -fault -stm -saccade -##uronal -##asure -piezoelectric -amalgam -##ithm -esd -virion -buried -propagating -##opyranosyl -nomenclature -jas -bcl2 -noncon -proband -breakpoints -prices -miniat -285 -pectoral -microdiss -##eca -45ca -##gesterone -thiobarb -disruptive -macaque -coeliac -oligodendrocyte -cereus -cd31 -salicylate -##retic -intert -mucositis -synthesised -rewards -mma -carin -scant -definitely -colostr -aeds -##oping -aac -collisions -keywords -nlrp3 -kine -antiangiogenic -unfavourable -enucleation -football -##apent -myelodysplastic -##enzym -thickened -hyperrespons -fw -crashes -yersinia -glibenclamide -backscat -mrp -##anesulf -antineoplastic -obstructed -distinguishable -snack -focuss -##osaminidase -signed -vitiligo -htert -##her -288 -fertilizer -ubiquitination -submic -interim -complicating -inlet -acycl -deciding -##analy -##erry -301 -jia -putres -tbs -ancillary -eco -oxidants -##odystrophy -rca -subreg -goiter -bodily -forage -crystallized -delib -##osahex -##ulitis -ontogeny -chips -intracytoplasmic -dad -unint -extracranial -overgrowth -##fractionated -##arins -htr -##phae -epilep -adenoid -isolating -cole -##otency -unconscious -roch -##oeae -sprouting -endeav -ghrh -peroxisomal -263 -##rogenital -##09 -##rospor -##entrifug -bulim -nanotechnology -tir -acrosome -dapt -electroencephalogram -splanchnic -asparagine -asphyxia -acd -brs -nanostructured -gravis -##urinol -diphtheria -mania -bouts -holo -silenced -304 -302 -cavitation -##cam -manic -epilepticus -chic -sprint -parkinsonian -sternal -vertebra -glycosaminoglycans -exogenously -microbub -leaks -patella -isotherms -antisocial -reversing -dqb1 -##antine -propagated -##centration -rectangular -continent -comt -ventrolateral -corticotropin -misleading -ultrathin -maladaptive -scrotal -iiib -canis -hairy -phonon -heterochromatin -pbr -267 -##oconazole -≤0 -organisations -alignments -bioaccum -thalidomide -dz -immunosuppressed -salicylic -perchlor -army -streptavidin -awak -quarters -appearances -sesqu -satisfactorily -nonselective -inherently -##ochond -ferromagnetic -researcher -arithm -ql -predetermined -rhizosphere -pallid -degs -##jiang -optoelectronic -vulvar -posth -inquiry -nitrobenz -bde -h9 -qm -evs -arithmetic -dth -azt -##yrinth -biomechanics -anhydrase -tethered -dimethoxy -russia -##tida -officers -malfunction -microvessel -460 -amlodipine -##ofibrate -broilers -##odilution -ota -##tles -##anoid -apache -achr -fcm -polyphenol -spermatog -fingerprinting -##blas -hepc -classically -##rew -endosomal -##±5 -ginsen -coexpression -##lux -photovol -rio -proactive -steat -coculture -socs -reflections -t1dm -philosophy -##onella -##atp -fontan -lymphedema -##cet -paulo -hydroneph -vinc -##nl -bringing -shadow -rapd -##etit -cooled -##tonic -shield -520 -microvilli -nct00 -histochemistry -##vo -##agliptin -phc -##2s -papilloma -coliform -biomimetic -lepr -bism -##epo -esophagectomy -legionella -learners -ovariectomy -subsam -versatility -millis -acous -docosahex -contingent -colostrum -##iflor -anthocyan -##imidine -##ipsy -decisive -##orating -##etan -capita -disassemb -##week -chemopreventive -fpg -hydroxyd -normals -omn -chelator -instar -misoprost -##etium -subclasses -reoxygenation -##imeters -paced -##gamm -purple -distortions -avidin -profess -claudin -euthyroid -aliqu -shbg -synovitis -gonorrhoeae -switches -thyl -paco2 -##itim -glycoside -groin -antitrypsin -##aracter -276 -##rian -roux -thiobarbituric -misoprostol -amoeb -##ourinary -heteros -##oglitazone -ttx -logarithmic -cemented -palmitic -##nr -undern -##umenting -chen -##200 -sri -lc3 -enolase -intratr -menarche -administrated -smith -##lear -depolymer -macul -erythropoiesis -accompany -mineralized -fabp -commensal -cler -cytochalasin -unipolar -agenda -telangiect -binomial -sod1 -coagulopathy -pdr -ced -kw -stick -##ybden -myd88 -eralpha -lord -bystander -sinensis -modifiers -hiber -calm -##ptosis -flavin -provocation -fluorophore -desaturation -receives -mycophen -radix -immunoperoxidase -procollagen -thallium -fmlp -##anoic -sterols -262 -edu -gym -intracardiac -##tera -##grams -mounting -diis -crosslinked -##ymia -radiographically -ophthalmology -##isis -g6pd -##oba -arteritis -pioglitazone -postgraduate -hang -##apa -anopheles -evenly -hpc -urogenital -desulf -dextrose -##ortus -α1 -mtc -autograft -##ilia -peritoneum -hemipar -michigan -phent -arid -nkt -spleens -cooked -palpation -mcao -clathrin -ils -inclination -lecithin -astrocytoma -poliovirus -empiric -##elian -mares -ghana -##opoietin -##264 -amid -phytoplankton -gras -cse -vocabulary -genitourinary -phrenic -##obr -rhb -plotted -xyle -vocs -##jejun -ene -tough -monovalent -inspiration -##litazone -dj -##affinity -rotated -osmolar -fellow -lcs -aphid -enterococci -e4 -wallis -mpfc -nevi -documenting -postal -anterogr -tnt -radiologically -painless -swing -harbored -atypia -agenesis -baculovirus -satur -lvad -sulfamethoxazole -marsh -##riol -ffp -ema -greek -##igmentation -thyroglobulin -##fig -##ropylene -anoxia -pharmacies -cornerst -graphs -##ombic -hemi -femin -bmax -strabismus -naked -##itans -##formans -##obaric -tune -krus -vanadium -##icum -subth -topically -enterotoxin -fissure -acellular -##irm -##ohum -misclass -##agog -hallmarks -χ2 -professions -tendencies -entitled -##ecret -shoulders -##otrophs -##f5 -f0 -pret -##ui -maltose -##grass -transluminal -tails -supern -chorio -untrained -protamine -tolerable -##ofr -pseudoaneurysm -things -koh -nog -ltb4 -##plus -cofactors -neurotoxin -aic -##ermectin -dilutions -macros -glot -##oembryonic -fe2 -##ounds -cystine -fru -##epsy -hin -urate -clonogenic -synergism -sort -zwitter -muc1 -##iment -##othre -a4 -keratoplasty -avascular -necessitating -sip -rifampin -##lycaemic -achievements -pathologist -plethysm -wnv -monocular -reminis -virtue -##ocycline -##front -##tinine -spaw -accent -noncomp -lobular -widths -translocated -synech -southeastern -##orbable -acetylcysteine -zns -ppg -mustard -##triatal -coumarin -nucleosides -scholars -garlic -hyperuric -catalytically -soma -ruminal -citations -stories -lentiviral -##5y -272 -##akis -circumvent -nse -apps -fst -grossly -chlamydial -electrom -impressive -passively -unintended -myri -gangliosides -featuring -sbs -abduc -identifier -##azolin -ribonuclease -vagin -b5 -contracting -lifetimes -##legic -##ybdenum -impulsive -adaptor -##yramidal -symbol -pid -eicosan -ethi -##adiene -hybridoma -recruiting -##rier -##terna -harms -dura -reser -shang -tlrs -tce -intracoronary -matrigel -basket -arcuate -octyl -ethers -mpi -polymerases -czech -transwell -##osupp -protoplasts -##fected -intuss -secretin -##ylon -pyridin -pyre -resectable -##feld -312 -intracellularly -nonresponders -chaperones -##ovaginal -ikappab -yt -impingement -carcinoembryonic -japonica -wt1 -mastoid -taf -##gm -ecp -postsurgical -endocannab -helium -thermoreg -competitively -unidirectional -hts -desmin -##iden -hemagglutination -osteonecrosis -oculomotor -luminescent -io -taxon -infarcted -##umes -multinucle -tanz -erythro -capped -mobilized -whate -abundantly -inserts -390 -whatever -turbulence -myometrial -oligomerization -colliculus -myofibroblasts -##atrial -polysomnography -ild -glycosaminoglycan -ji -rac1 -averages -sarcolem -tell -postc -indo -lasik -unfamiliar -smoker -disrupts -remnants -##adin -bioinformatic -posttranslational -uganda -isi -transporting -postcon -isotopes -putrescine -aminopyr -cia -lia -genic -antrum -##energe -effluents -biotechnological -tween -sublingual -ctni -scaph -intuitive -acyclovir -neuroticism -abused -extraordinary -tier -##e2 -gist -ttr -recombin -harness -chemoradiation -cant -mlr -proves -metallothionein -##uloplasty -##acryl -chimeras -##evalu -aminopeptidase -ikk -cerc -ibm -functionalities -beer -icus -cpd -immobility -fluorodeoxyglucose -kinet -##rolactin -ctgf -anorectal -##cnt -additions -couplings -cloac -tle -neoformans -expands -hapten -hibern -teenagers -nests -endotox -asser -##extraction -brucei -##hibition -oligodeox -sirolimus -endorsed -levofloxacin -dul -intussus -taz -isoc -biogenic -conidia -opson -colors -methylphen -430 -##ett -exponent -hne -##exc -dus -precede -teratoma -##albumin -plt -calories -##urization -tailor -316 -electrophysiology -thromboplas -hypochlor -pce -tramadol -lav -illustrating -seminiferous -rye -summation -microcys -competit -fingerprints -inad -cardiotoxicity -proteobacteria -tuberculin -cili -greece -administrations -splenomegaly -sio -hypoperfusion -perist -galanin -palliation -peroneal -hydroxybutyrate -##androsterone -yang -r0 -reduct -nirs -mop -plank -##ectral -climbing -sensations -goes -##activator -prb -stocks -##ulectomy -tailoring -isoprenaline -##uese -hypertensives -##aterals -unplanned -stasis -hpt -electroencephalography -##uctured -srp -rud -alliance -radionuclides -orphan -nontoxic -bronchoconstr -deserves -heuristic -portuguese -underline -sorbent -compost -mdck -erosive -dum -##uren -endoscopically -fermi -nile -gonads -breakpoint -stau -abp -deciph -racem -insecurity -nighttime -incisional -impregnated -nfat -subsurface -manifests -analytically -aspirates -unavailable -hyperphosph -bred -stoma -261 -precedes -dmba -##roplas -lw -autoregulation -dlp -##vc -noisy -coagulase -##ectum -wetland -undefined -supplying -cd36 -multicomponent -mrc -shorten -cga -flask -##ospora -##unn -unacceptable -wp -##ogs -303 -hypersensitive -296 -varus -federation -##ohydro -spatio -worry -aggravated -chlorophenyl -vasomotor -sporulation -##atent -furan -pm10 -##pir -icv -gabap -cocktail -ond -470 -lca -488 -disagreement -tungst -occupying -synaptosomes -adrenocortic -##anglionic -afb1 -constituting -microinj -##apr -clearing -##orrected -utilised -thylak -neighbouring -nanod -polyploid -glenoid -stat1 -horizon -kal -satisfy -extrapolated -neurofibromatosis -benthic -anthracycline -incompatibility -ppa -ppc -sulfonate -cooperativity -intraoral -##edics -##ryst -equimolar -##overs -aquac -restores -##teat -hemagglutinin -enforcement -##perm -strata -avm -neighbourhood -lepid -##guanosine -oi -videotap -lateralis -ranitidine -controversies -indians -cd68 -dmn -hydride -##ocortin -bullous -spar -expose -flagellar -##idial -antiapoptotic -pufas -dag -npp -##ocereb -fats -##activities -intersection -mage -uncharacter -nonre -ctp -homic -appointments -lak -preformed -##otoler -blends -nscs -adjustable -normoxic -orexin -##ibrillar -##olium -b10 -##iliensis -cme -primordial -tritiated -amitr -##gravity -collaterals -witnessed -##avastatin -retinitis -c9 -races -alfal -youngest -sesquiterp -cataracts -archaeal -##inators -krebs -producer -aggl -##ocarpine -286 -tfp -##ups -gabapentin -ito -sway -ferroc -embryonal -##ibenz -narratives -tdp -##anteric -endogenously -##140 -minocycline -ntr -disconn -269 -##atran -endoscope -difluor -approximated -gallstones -protrusion -polyurethane -exciton -neurites -##olyticus -##rooms -6a -avidity -perforations -truth -remif -##ecretory -isos -periodicity -fastest -overlapped -vero -consultant -mock -hexa -semistr -sterility -clavul -server -277 -inhomogeneous -pons -femor -nsc -alfalfa -yoh -lycopene -creative -aap -xeno -##ophthalmitis -occurrences -steers -##tines -computers -revert -chi2 -guang -amelioration -visuospatial -depot -behcet -recommends -isoleucine -corneum -##ropion -ultracentrifug -##abp -##omalacia -##1c -##iances -##ectasis -dosimetric -cohesion -##imod -technetium -awakening -##amps -hydroxyphenyl -steroidogenesis -launched -fk506 -microem -linolenic -bost -perine -##kins -endang -dimorphism -maximizing -curett -prenatally -fluence -bromocr -semistructured -glute -nonsense -linc -##cha -clom -tourn -ocs -migrant -myelogenous -stabilizes -##asculature -hyperre -##omol -oxa -websites -540 -likert -##123 -neurobehavioral -amended -##uximab -giardia -defec -##etast -styl -##isole -nepal -ipl -##omimetic -chs -ivus -edentulous -cumulus -hyperresponsiveness -violet -##aya -##athion -infested -regressed -thalam -##acyclines -pef -cens -beings -parallels -##itations -ship -raters -posterolateral -infarctions -macrolide -vaginalis -entrapped -hypokal -b27 -##acental -revisions -bronchus -ethiopia -disadvantaged -ants -aqp -pink -##flower -turbidity -5ht -interchange -##kt -metic -##oglycer -fucose -indwelling -intoxic -tavi -292 -participatory -seated -musculature -subch -logarithm -amnesia -glucopyranosyl -oliv -occupations -##ylline -schwannoma -hepcidin -etan -landing -xylem -gat -peo -sgl -vats -molybdenum -unexposed -zeolite -drow -hyperglycemic -##aska -sonication -fenton -transepithelial -sublux -##izine -thromboplastin -##amellar -flocks -##ulsification -weakened -##ynes -taa -installed -clarification -remifentanil -drag -niches -pursued -cuticle -0004 -desicc -villus -spiroch -urology -subspecial -atherogenesis -osteopontin -lymphoblast -##eutical -##abilis -cotinine -pupils -malnourished -esterified -##arative -saved -kruskal -resistive -tensions -intravesical -##oquine -diverticulum -allopurinol -formally -calend -274 -payments -assemblages -scapular -haw -starved -##romedial -chc -dwell -##echoic -spars -conson -##osinus -vacuole -recession -knife -aom -catchment -tav -preimplantation -sales -occupy -foref -refrig -baical -wl -##yps -disposable -##ometrium -hart -conjunctivitis -##vised -digits -asking -deoxynucle -tobr -childbearing -##opathogenesis -jobs -companion -aeration -diterp -cements -##ulans -forceps -carcasses -hym -predictability -##tiana -gonadotrophin -tnp -loneliness -belgium -prs -intensification -iont -appendectomy -androstenedione -circumflex -fluoresc -flt3 -herbicides -sy5y -hypocalc -##rance -##ender -vitality -cotyled -undetected -ait -incubations -##rolactone -quinine -disintegr -mainstream -metach -lapse -harr -isov -longus -unemployment -reevalu -fluoroquinolones -ruminants -hyphae -mononucle -335 -sting -stall -validating -271 -haemophilia -defibrillation -protracted -neutralized -atrazine -guides -docosahexaenoic -watershed -cd5 -curettage -lq -puff -ketoconazole -##adhes -wheelchair -fistulae -##oxys -coronavirus -postpro -tod -northeastern -ubiquitously -##eptors -spas -nanometer -##testosterone -leaflets -##bed -necessitates -vastus -budes -299 -otc -##etector -resistances -fuels -pigeons -mtd -ctls -assembling -##thiaz -cpl -endophthalmitis -dominate -##operitoneum -cgy -##cls -##domains -quebec -tetras -detached -immunolab -print -invade -burned -##inarily -posttest -##ozoite -cholesteat -unpro -suddenly -##amilies -nights -##rogesterone -cheek -penta -diuresis -##low -extractable -invaded -inevitable -adt -agrob -intercon -reb -pharmacodynamics -rewarding -urch -999 -##ulant -basically -budesonide -pervasive -##ultures -cellularity -##ochromic -disintegration -phagocyt -dca -spc -spectro -berber -aggr -gingiv -coincidence -rpm -##benzene -thermodynamics -boston -echocardiogram -hyperemia -##interpre -amounted -erα -accelerometer -8th -rosig -pockets -smi -prrsv -spur -entorh -allied -sirt -retinoids -hpv16 -parsim -hydronephrosis -##olyl -condoms -##urant -##aphy -complicate -##relation -##ubic -ceased -reserves -##oconjugates -ssdna -merg -irid -##hydration -matters -hypertriglyceridemia -gills -oxldl -lesional -thiop -cellulase -hypoxanthine -intussusception -rgs -dissections -thymine -caught -myometrium -##hel -steroidogenic -lns -##ository -arthritic -##y1 -gapdh -pomc -rcs -erosions -rosiglitazone -taut -##nit -embr -promyel -bismuth -sorgh -trifluoromethyl -chondros -vat -congru -pasture -wetting -##elvic -stomatitis -uncondition -282 -aptt -c60 -hydroxych -lignoc -examiner -ambulance -premed -relay -##ointing -4s -##ospheres -mesial -foveal -chromaffin -nevus -sumo -osteoarth -parvum -bilingual -inactivate -coq -underestimation -appreciably -850 -##vales -##iptine -c5a -##epiandrosterone -guil -eoc -unsuitable -phb -##identate -##pora -gonadotrop -sealed -6h -accult -##adecanoyl -yes -decou -relieving -zea -##represented -ointment -legisl -triceps -ffr -pj -##ercept -adma -colistin -shade -landscapes -##iflex -##gae -inex -dissol -alga -grained -benefited -uneven -waking -##romas -##ulosic -conjunctiva -joined -384 -subretinal -hydrostatic -##onite -cytored -suvmax -##erian -depic -generalised -guard -thz -prospect -##imbine -ned -unpaired -##itarian -gsk3 -##ofovir -wort -apnoea -brightness -exemplified -penicillium -feeds -##with -excretory -striated -masseter -perfectly -##omac -mvpa -histogram -bronchopulmonary -intrathoracic -endop -multilocus -hyperglycaemia -hmw -##ounting -masticatory -pooling -apap -hydrogenation -houses -aetiological -detergents -estrone -fluoroscopic -filler -chlorp -##iptan -reclass -kim -spf -nonfatal -mammographic -classifiers -tegmental -tdi -dide -il6 -holter -photovolta -31p -immunisation -experimentation -phacoem -anticipation -propidium -enteritidis -bloc -automation -esterification -coryne -metamorphosis -unreliable -daf -gssg -trep -b19 -indium -phyll -entorhinal -nematic -runx2 -pilocarpine -instituted -afflic -asymmetries -lpo -helicase -restrained -mrd -aversion -gip -flowing -phentolamine -##acylglycer -employers -packages -genuine -thrombectomy -reex -wearable -maneuvers -bromocriptine -##ortin -arteriosus -hydrate -cefota -uncorrected -virologic -3m -##xs -stenotic -##plen -##operable -shs -432 -cytoprotective -##obox -mbs -ate -laccase -##urv -ovulatory -ntp -pco -stools -blm -psychophysical -elaboration -mpm -redes -masc -##arcin -stretched -cbl -345 -qc -##irs -executed -obliteration -lbw -vascularity -##atidic -trisphosphate -colic -purulent -accession -torsional -cgh -pso -impressions -marking -cimt -mbq -etanercept -rafts -oximetry -yohimbine -pristine -xenobiotics -analogy -zymosan -intraspecific -pyelonephritis -hepatobiliary -bisexual -12th -tobramycin -mud -palmar -leiomyoma -excitations -284 -soldiers -##inqu -##afe -nonne -eighth -underestimate -duk -3s -stroop -##bosacral -oviposition -polypropylene -ambiguity -oe -spectrin -tard -hyaline -endoderm -hematology -stacked -repositioning -preferably -##idases -paraquat -antipar -conserving -rns -alloc -##aclopr -reserpine -##bert -##ygb -ipscs -dup -sensitizing -latitude -obligatory -anthocyanins -patell -successively -279 -neuralgia -ergometer -anterograde -orr -boxes -lithotripsy -instruct -##omys -inadvert -pontine -privile -brucellosis -massachus -phacoemulsification -carbons -accepting -##angli -##opulation -##hand -dehydroepiandrosterone -##inae -complexities -catastroph -massachusetts -stereoselective -gallstone -underway -resistin -oac -##iliation -283 -mats -unim -anthocyanin -ewing -##ormone -loadings -##ixed -##zees -centromere -bites -chimpanzees -ctd -333 -cyp2e1 -nanocar -##ensor -mdp -hits -predisposed -rearranged -hamart -##urement -vaccinations -sacrifice -erroneous -centroid -foodborne -##pers -autofluorescence -brca2 -scenes -imprec -slurr -intercostal -##merc -methylphenidate -ambulation -triphosph -ccl2 -saponins -fps -unambiguously -ldlr -##azoles -educate -##k3 -##exia -doubly -cnp -hypoplastic -##145 -interdig -##dam -glucopyranoside -aniline -unreported -phs -satiety -bars -conformers -ensuing -ventromedial -epileptiform -insular -religion -angiograms -sdh -costimulatory -p24 -bisphosphonate -cvc -##gap -bak -quintile -tnbc -calcit -truncation -##alazine -periapical -colocalization -21st -dutp -proteus -passeng -treatable -needing -##endor -convales -intramuscularly -powers -##ostin -##yan -cah -intensely -femoris -peripherally -##ococcosis -neocortical -interfered -ladder -tcd -bicuc -mrl -##bian -regrowth -##robe -suturing -aip -elevate -mineralocortic -resides -##aluable -##entery -captures -##rological -deconv -decrement -refused -##rofacial -pads -chik -succeeded -l12 -meld -##utz -lumbosacral -sorghum -impacting -urethane -shanghai -ally -unple -behaved -aad -sealing -sox2 -consulted -r3 -actors -erb -catalysed -fronto -acromegaly -glabr -preclude -hydroperoxide -brassica -preventative -##uberculosis -meningeal -pixels -consangu -##otidyl -poag -precipitating -arsenite -##eke -##ivalence -ptv -##05 -organophosph -##ocaps -asm -desert -feedst -seaf -bathing -cefotaxime -equilibration -cdp -amphip -microalgae -fee -##obese -##plasm -diagonal -phototherapy -scholar -diverged -625 -checking -industrialized -##xin -fight -agro -294 -naprox -alleviating -bj -confident -myelination -fortified -discordance -towns -neighborhoods -512 -313 -unaware -##male -##mentum -ror -sta -##itize -staurospor -malabsorption -287 -##acetamide -tss -gluconeogenesis -##iffusion -vowel -cd15 -nomogram -bass -spectac -##alva -notification -##essel -courts -unpleasant -##aniline -mam -yoga -agrobacterium -claudication -##ogold -enox -desatur -squir -nonunion -gpcrs -opacity -##irradi -chlorpromazine -##aer -forelimb -judge -gravit -melanocytic -oophor -revis -deregulation -aster -##bon -financing -exams -premolars -syndromic -thrombocytopenic -admixture -fatalities -deformations -##igration -goss -photography -trnas -send -bronchiectasis -metatarsal -tuk -306 -bore -##anx -registers -##anciclovir -ssris -fusiform -elastography -##ohumeral -miu -aquaculture -##ograv -m4 -##oacetate -beta3 -##duoden -amphibian -diminishing -governments -##oglutarate -##opian -beetle -##oi -pneumophila -bicuculline -318 -nontrans -454 -##umatic -405 -phenoxy -297 -unins -##oken -intubated -ivs -##etence -atlant -papillae -##atiles -cdkn -289 -dissociative -metastable -husband -calling -disruptions -dsb -camb -perfring -parth -##ogel -recurring -phytohem -lighting -vinblas -milest -lined -##zn -interr -macroglobulin -##orientation -##oguan -insured -malocclusion -nothing -s4 -ssb -steam -fouling -##ralpha -##tigraphy -collar -oligodendrogl -diap -ration -fulfilling -micronucleus -##ymmetric -ert -circumc -wilms -contraindicated -promises -dithiothre -quenched -investigational -srt -centrifugal -triphenyl -hx -incubating -##actyly -mibg -chirality -cen -rrt -naproxen -##oria -##urans -hw -hyperlipid -hfs -adal -ultracentrifugation -musical -noninvas -##imol -intricate -##cellulose -hypogonadism -perennial -lateralization -polynomial -rads -##lr -##ruc -cardiover -triazole -paris -oxford -cachexia -3c -rear -tibialis -hscrp -dithiothreitol -281 -energetics -basophils -confused -hco -perpet -undertake -snoring -##127 -extractions -diagrams -frogs -gos -ofloxacin -hinge -dibutyryl -volatiles -##icola -hypercal -misdiagnosis -hemangiomas -purchase -disciplinary -##meas -sleeve -acetyls -gq -dissecting -f344 -dppc -postd -manometry -herni -estu -##gy -betaine -309 -micrographs -destabilization -sily -##06 -mpr -raft -bisphenol -immunoregulatory -aftern -unintention -##vt -unve -euthanized -laxity -##optysis -microstructural -repetitions -##105 -intestines -##roni -electroretin -organize -α2 -cas9 -constitutional -wga -cephalic -##tetr -extran -colocalized -immunochemical -##ropath -prolonging -##olecule -dialogue -corpora -cartilaginous -gct -submerged -isogenic -proliferated -carinii -erbb2 -calcitriol -stimulants -##worms -pollination -tribut -##odial -purpos -##crystalline -obvi -inte -expansions -angiogram -euthanasia -##ontally -duch -gloss -##onolactone -tagging -paranasal -reminiscent -keratoconus -antegr -alphab -hydroxylated -##yxin -concentrating -spans -accuracies -pemphigus -wss -raw264 -##ynchronization -##rimers -hete -##onto -nph -323 -adipogenesis -##co3 -arginase -triam -##jury -appliance -##hole -dressings -##ictal -colloids -advisory -avf -microsphere -##uities -##ofrontal -daltons -##urative -560 -underscores -hone -##athesis -rhd -##isations -lil -transglut -photocoagulation -breadth -enantiomeric -ablated -methox -supraventricular -decontamination -##asy -reticulocyte -h5n1 -dps -substantiated -tukey -biochem -enclosed -esp -apheresis -satisfying -vk -infinity -d5 -sulphur -sympos -neointimal -chromatid -thymocyte -tapping -repulsion -muco -invaluable -delphi -absolutely -silicate -355 -diminution -ergonomic -afternoon -smartphone -ropivacaine -tdt -valsalva -pests -##uries -dehp -##ophosphorylation -fullerene -normoxia -lewy -cucumber -aec -refolding -recalcitr -##urd -minorities -premalign -favoured -osseoin -architectural -microangi -brushing -gastrost -mayo -bsi -homeobox -vinblastine -##ophile -irreversibly -##tised -##iasm -neuroradi -gust -accountability -p70 -vp1 -##zolid -spouse -founder -tec -multiply -rape -rosette -gstm1 -provincial -syd -parane -pyogenes -cd86 -mumps -mig -guanidine -lentivirus -perfringens -abusers -435 -ppis -##holm -##ached -eac -retrotransp -unequal -gelation -##odality -arbor -motives -neurofilament -##idov -skf -pluripotency -serca -grasp -##ughters -derivatized -spermatids -##±6 -thaps -dmp -https -fors -hrql -invag -collagenous -##refring -##icon -vocational -reluct -3r -articulation -dimensionality -gallium -solubil -rhinosinus -rotary -##t4 -naphthyl -##spr -##tens -dihydroxyphenyl -assisting -bont -chondrogenic -##util -ceramics -unfractionated -governmental -birefring -307 -meteorological -trx -coarct -tadp -orofacial -##phalan -1969 -anterolateral -immigration -glassy -ers -tunis -precipitates -propylene -324 -removes -##olimus -dtc -multiform -##acetam -avenue -reint -##otetr -tonsillectomy -##atalysts -##iscal -##bm -impairing -carcinogenicity -##organized -preponder -convolution -variceal -workshops -earthquake -sunflower -updating -unity -cued -prematurely -epigastric -effected -pbdes -bags -anticipatory -dyspnoea -deriving -leachate -postures -mfs -chelate -pyloric -crustace -##ibi -antegrade -##carbonyl -diffusive -finished -neutrophilic -diminishes -tanzania -##utamide -incurred -sunitinib -metabotropic -##plasmic -precoci -cd95 -##hss -p0 -##ilol -##udinal -proving -ddd -cdr -simplify -phenotyping -operant -pyreth -gefitinib -labyrinth -ivermectin -oleate -cornerstone -irb -314 -reinst -hoped -iad -staurosporine -##glycine -aptamers -melphalan -t6 -insulator -##oub -byproduc -oviduct -ptr -##nut -sirs -aat -##onitor -pvl -##tilbene -expired -amides -##rocar -##ophilicity -weap -isokinetic -##ifn -nonobese -paradoxically -rhinosinusitis -ted -dls -proteas -##ilson -suicides -galnac -398 -dermatological -landfill -septicemia -citation -relaxations -conceal -thereof -disclose -psg -johnson -nonionic -confronted -1960s -##oserine -##pyruvate -carboxyp -thymoma -immunopos -##bling -328 -##me3 -nylon -##ventional -rpa -##4b -tbp -critic -pancreatectomy -voltages -##akic -ipd -mtr -338 -calendar -nestin -##aci -overuse -mycorrhizal -premise -cacl2 -firmly -##ilation -wrink -quinidine -anthel -17β -veterin -nucleoli -radiosensitivity -cuts -lk -endangered -hybridisation -attribution -##ogas -exhaust -##vular -##fb -dihydrotestosterone -dicty -typhi -perit -##occus -dock -##peri -##opard -pyros -ptt -mict -divor -spme -solit -##neumonia -sponges -jam -lir -mendelian -daughters -snf -transr -capec -guanylate -##ptics -polymerized -decompensated -enters -linezolid -##obal -##pps -hemolymph -cardioverter -interviewing -caa -salping -##oliation -##mscs -4e -bioaccumulation -##ohydrolase -##ecture -cyanobacterial -sternotomy -##imed -download -cannabinoids -##vp -assistant -##l4 -patern -etch -populated -bse -micronuclei -cytokinesis -subchondral -spinach -##omus -lmw -spotted -##bin -clinico -hypothesised -nulliparous -butyric -lobar -##obacillus -paroxetine -oryzae -cleave -neomycin -rutin -phenolics -egypt -##fd -336 -322 -##ophiles -pedestr -##ospores -aroma -##amptic -grief -##3c -##izyg -##kel -subfraction -colombia -pann -ctcs -noninvasively -##oda -hydroxysteroid -macula -##anthus -pht -326 -kyphosis -phytohemagglutinin -calb -sangu -##alact -assistants -photothermal -ffm -##ocally -malle -subsid -capecitabine -trkb -eradicate -victoria -proapoptotic -laminae -bsp -##road -##ometrics -12p -handicap -entom -##4002 -dhe -##adenoma -abstracted -##pervised -equatorial -pvs -fluorescently -worsen -percentiles -##aresis -circumscribed -continental -empy -mussels -thapsig -##yi -##occup -tritic -tcf -##uating -317 -improper -anecd -bursting -apatite -globus -skelet -labial -##atement -squirrel -broc -photop -sponsored -prk -nonlinearity -adalimumab -cetuximab -subtr -neurotensin -metallo -fumarate -quorum -mitogens -barely -conformal -grant -mesothelial -##din -6b -##imbic -bonfer -marc -misinterpre -##atentorial -numb -diox -##urational -intercept -panor -paraplegia -variances -looks -renewable -pgd -##160 -##ohepatitis -##adias -volv -microelectrode -##2p -reven -##ocerc -forehead -superconducting -cd40l -coryneb -apoc -cynomol -counteracted -dealt -widening -##vr -neuropathies -stan -##alp -ida -spiro -thuring -exotic -commenced -spared -##hyper -telec -##ocystein -everolimus -thapsigargin -tendin -abolish -rhabdomyosarcoma -##abric -fivefold -phobia -benzoic -nociception -##opexy -degenerated -lta -ym -indolent -hunger -quantitate -1968 -amitriptyline -inval -nonsyn -psychologists -endosperm -humanized -ascs -##cl3 -meanings -hairs -pws -hypotonic -jn -##omycetes -skewed -orthodon -hamm -##oprofen -osteopenia -aplastic -exome -leukopenia -zidov -neurotrophin -nicotiana -transrectal -pneumocystis -e2f -interferences -##eedback -handled -##ipes -resonator -multipl -lc50 -##fed -coadministration -anhydro -291 -walker -##peak -auricular -inoperable -mainland -oriental -##umer -prein -##ymb -ccp -therapeutical -##enzymatic -localities -hydroxybenz -draws -lysed -ecologically -myriad -##elen -mt1 -decoding -uncharacterized -##ymet -hypere -##otypical -hemin -hydroxyurea -##ucher -berberine -eud -##piper -examiners -peculiarities -microti -##estrus -##odeoxycholic -pepper -##asters -hash -autocor -repell -formalism -sids -denoted -##cyt -premalignant -ims -blade -scavengers -stx -meant -zidovudine -readout -mwcnts -lactobacilli -knew -tetradecanoyl -mated -trimer -dde -ucl -p5 -biomolecular -wilson -opposition -dendrimers -entries -1965 -electronically -##iliac -opaque -polyelectrolyte -##v2 -##ims -##openem -pex -organisational -itch -dipeptide -abutment -varicose -asi -dissect -preserves -intercalated -relaxant -advisable -gastroduoden -serologically -enhancements -342 -thermograv -texts -corroborate -##odular -agonistic -policym -reuse -coincides -provisional -enterpr -stepping -deflection -verruc -normalize -cardiov -entrain -polio -cyp2c19 -##oretinal -##rolases -##mates -##usen -really -harsh -duchenne -cd18 -ebola -gelatinase -##apentaenoic -antitum -relaxing -chx -fallopian -##opropane -advertising -entails -packaged -##c12 -pud -clipping -command -officinal -ft4 -frameshift -tour -sh2 -centuries -##uta -hccs -icf -drilling -##109 -gilts -##arming -topo -camkii -contag -nitrification -electroencephalographic -urge -bonferroni -##udied -##robacter -343 -deoxyuridine -nonoperative -piperacillin -##bands -trivial -##ucker -##oduodenectomy -##trich -##urban -planktonic -daun -##fetil -adipogenic -deferens -worn -explosive -tapered -voric -sialyl -intramural -cog -stereo -subjectively -spend -inertial -introg -photob -mpn -salience -mammal -agp -aspirate -##rozole -passes -hydroxydopamine -periplasmic -breasts -gis -eleph -timolol -##iones -alar -monooxygenase -pus -compaction -behaves -respects -aminoglycosides -xii -##q13 -demarc -specialised -##box -irradiance -premotor -kanamycin -gastrostomy -eminence -dech -provoke -hospitalised -faecium -##fh -aeromonas -tourniqu -##orters -cdk2 -flutter -teleost -cyp2c9 -dpat -microcapsules -fluorinated -luts -remitting -venules -tracks -fog -prebiotic -nonpolar -transplantations -pericardium -hyg -clefts -ptosis -demented -ept -##ariasis -hyperbil -##oplasmin -semis -insensitivity -stut -spoken -3t -protozoa -indonesia -##ophoresis -antinociception -progest -meropenem -returns -voriconazole -converge -louis -feno -cya -rbf -carved -bootstrap -interposition -dioxygenase -myositis -underm -depolarized -assure -pho -outperforms -reportedly -paraneoplastic -pyridoxal -methanolic -biofeedback -##onception -heard -lordosis -coarctation -patchy -sss -adrs -ahead -p63 -harris -papain -##isperse -##atalyst -steatohepatitis -adhesives -mica -enzymic -resembl -hads -cholesteatoma -opacities -##olumbar -lactamases -topographical -illuminated -onwards -pharyng -##ologically -nucleated -impede -daph -congo -neutropenic -ethanolic -425 -athymic -beetles -remind -perikary -protoporphyrin -cytochromes -amygdal -##ropical -pi3 -bronchoconstriction -hyperkal -ciliated -spirituality -plated -dpa -##yrrh -antigenicity -micronutrient -podocytes -mah -antagonize -feeder -puerto -microextraction -directive -532 -ileus -apcs -northwest -radiopharmaceu -elders -brow -perif -collagens -lecture -psm -photore -ntg -muscimol -##electrophoresis -dropout -modulations -oscillators -abbreviated -gramm -hamstring -pericarditis -pyrosequencing -js -caga -lanthanide -##orea -hai -triangle -phox -ecog -oab -##otyl -##yzed -flushing -transglutaminase -##oxime -abandoned -recombinants -antith -radiois -affiliation -##cross -##icularis -monolithic -uplc -##verbal -##170 -sfa -corticospinal -economics -haematopoietic -coincide -mary -intermed -acyltransferase -##amedullary -cards -rxr -alm -robustly -penetrance -reactants -terminate -crick -clc -pursue -##hemisph -cma -##obular -sectioning -npr -hostility -510 -radii -subthreshold -impulses -stabilities -pans -expenses -amplifier -amputations -undoub -##odiagn -##flies -##orelax -hva -minimizes -rk -##indin -superco -arisen -≥5 -shark -sanitation -##etallic -gloves -##galact -486 -jp -containment -interrater -##ovan -ly29 -##othiazide -dishes -hyposp -boc -##alesional -##ries -bitter -##obe -##oplanin -finds -anhydride -propri -discour -pgl -txb2 -macroscopically -congress -seeing -selenite -unsupervised -ias -jones -##odeoxyuridine -gtpases -shall -legitim -riva -attractiv -astrocytomas -chiropr -tachyarrhythm -interictal -expressive -##f6 -fabry -transmissible -sclerotherapy -municipalities -decorated -##held -ordinal -phenanthroline -amf -cmp -rooted -##otrig -univ -interrelated -nonb -cereals -translating -biologists -immunostained -nails -halide -pharynx -exercising -sero -microliter -##type -illusion -##pox -bioluminescence -heterotrophic -psychophys -flun -dcp -1800 -specifications -##ethylamine -succession -optimism -1p -decoc -cdt -silage -±0 -associating -microvasculature -metazo -kinetically -hydroxypropyl -framingham -minip -hemif -##aphic -chromatic -porcel -hmscs -punishment -impar -pallidus -vapour -troubles -emotionally -glaucomatous -bav -amr -laa -vntr -inconsistencies -aura -apposition -biphenyls -evar -##rocytic -inaccess -ppr -eos -nect -dph -n6 -##alignment -nanostructure -excellence -amblyopia -aglyc -##inavir -macronutr -tungsten -##abolism -dimorphic -kaw -euglyc -perfluoro -cyp1a2 -pumped -##aspinal -subluxation -490 -herbivores -6r -##uronate -##anediol -cecum -cran -hsp27 -angeles -calculus -periosteal -aha -uni -stimulations -cd80 -o1 -506 -stride -recess -catechin -##cf -paget -trophoblastic -ruptures -delinqu -##chard -erm -continually -textile -ablative -thuringiensis -vanadate -histograms -discol -laterality -aortas -mineralocorticoid -optimised -mers -fbp -##isd -##mers -seasonality -urologic -wetlands -undoubted -##980 -warts -osteolysis -logmar -lymphoblastoid -undertaking -turbulent -mullerian -##ulinic -enterocytes -tropomyosin -annotations -hypocalcemia -asexual -prun -magna -ido -378 -containers -pupillary -glycero -##ilicity -##loem -reconsider -secretase -dtt -flagella -transmitt -cook -standardize -##jo -##domain -angii -deciduous -cynomolgus -##hap -postis -780 -##eruleus -oncoprotein -procoagul -exchanged -tgfbeta -asynchronous -apn -taiwanese -gca -mycotoxins -sutured -alexith -genitalia -ifa -verteb -##encaps -britain -localizes -##holding -coi -mitigated -peroxisomes -fow -suffers -consultants -haptoglobin -localizing -traf -530 -lysosome -philipp -abb -southwest -pik -wellness -##wash -332 -antiferromagnetic -adiab -arrhythmic -summed -lump -pab -adhering -electrocardiography -clips -imidazol -immunoprecipitated -##wv -##olines -neuritis -sure -organochlor -agrees -##igenes -##enstrual -promyelocytic -##weigh -##emetic -ima -multipotent -pedunc -pugh -##cinated -dichloromethane -herp -ht2 -communicable -glabrata -bci -tsc -tumorigenicity -serosal -lamellae -sacro -##iosity -dmi -ecule -##osteron -##no3 -orienting -hemiparesis -gdf -notch1 -##emal -ross -tentative -zwitterionic -patellofemoral -dst -csd -##phia -lenti -uw -wool -sling -scanners -twisted -proct -shp -phosphoprotein -kcat -portugal -spark -roof -linkers -drained -##yclo -mycophenolate -gracil -hyperprolactin -podocyte -addictive -vep -distally -##uder -radiotr -intraductal -plantarum -informants -407 -##onitoring -vsd -uninsured -dbc -agr -attractiveness -dermatologists -##ontium -thoracolumbar -glomerulos -teamwork -haptic -fio2 -outperformed -##ivocally -unspecific -riton -metalloprote -bioavailable -sdb -ore -050 -elevating -indocyanine -ejaculation -wounding -##b6 -alu -##bachia -rgc -phloem -##104 -ferul -fluorophores -propionic -catfish -narrower -omitted -ensured -penins -diffusely -##rophied -refugees -##brom -precocious -ritonavir -microspor -burkitt -augmenting -iodinated -##maleimide -dormancy -vine -ncam -bvdv -books -mgmt -##yness -ili -happy -perir -dilemmas -bum -tams -permeabilized -enterocolitis -bragg -obstetrical -meconium -gossyp -##oprazole -recruits -symbolic -interferometer -dcc -319 -hamiltonian -brackets -##trauma -jewish -##ela -pneumatic -expiration -pulsatility -disproportionate -sscp -formulae -gynaecological -tep -vertex -pon1 -##silyl -zikv -rivarox -##cu -citalopram -coel -rams -coinfection -pallidum -xenon -6000 -ecori -cryptococcus -aorto -peel -reversion -tactic -hematomas -parvovirus -illegal -##grip -rivaroxaban -ptfe -transmitting -fears -proprioceptive -amenorrhea -aggrecan -##aortic -cmh2o -historic -founded -bombesin -metocl -estimators -##opramide -dorsiflex -trepon -cgp -lx -mapks -##hemispheric -adnexal -329 -1600 -vsv -naturalistic -##pping -cgs -binder -vhl -cleav -japonicum -multistep -flanked -baff -chase -systole -nitride -deformed -alike -empyema -teratogenic -extremes -aquaporin -llc -meters -prescribe -uvr -bcp -##alys -philadel -dispensing -glomerulosclerosis -snare -640 -potentiating -lpc -footprint -alend -histolytica -brands -retrieve -##ublish -formations -microgravity -wolbachia -coin -angiographically -decompensation -pyrazol -photosensitizer -anthelmin -antiemetic -hyperex -373 -pri -ellipso -##ungin -cgi -excimer -intercalation -temp -ssri -essay -immunodeficient -afterload -photophysical -pleura -urchin -mosm -paresis -pseudop -##encephaly -correlational -##terdam -purify -urbanization -dodec -titrated -327 -hepatocarcin -nfk -ingrowth -tlr9 -baboons -karyotypes -lifestyles -carvedilol -osteochondral -disulph -smad3 -imbalances -##jugated -constell -##tage -invasively -banks -postintervention -committees -ethidium -mpv -meningitidis -cdc42 -cephalosporin -##oparas -ree -mpc -neurologists -wmd -wines -##othal -##ophages -lipo -manifesting -handedness -stair -secondarily -potentiates -##ilane -##unate -psychoactive -scarcity -metoclopramide -##tris -eigenv -gingivitis -unloading -adduction -pfos -symposium -prokaryotes -involution -scab -jc -##q21 -2alpha -larva -mismatches -##trast -glycemia -##hemoglobin -fellowship -tenth -glyphos -philadelphia -##ublished -rgcs -isthmus -scintigraphic -osmolarity -theor -inserting -contraindication -bronchodilator -wasp -indica -southwestern -ganciclovir -niss -jumping -kern -sebaceous -premolar -anaesthe -##ucent -glyceraldehyde -ail -dabig -saccadic -mtbi -##elly -isl -720 -sulfo -p19 -##ansetron -icm -glyphosate -gallate -overestimation -gerb -plethora -##core -rigorously -resemblance -dabigatran -concise -whr -cardiologists -ionomer -630 -withdrawing -intelligent -##kyo -unequivocally -installation -tackle -radiative -lettuce -perforin -1g -##entious -epoch -toothbr -intrapartum -dans -plasmal -scatchard -burkholder -331 -##dt -regularity -plasmapheresis -omics -unbalanced -frontotemporal -321 -blu -tubing -##tier -refusal -multicentric -strontium -##ubular -omission -amplicon -s9 -crist -physiotherap -parasitism -recalcitrant -##agin -astr -pravastatin -coag -##ozin -ribozyme -millil -fep -myelosupp -ly294002 -asl -putida -##axine -spironolactone -##aeus -lactide -##onvulsive -vasculopathy -##furan -##evolution -resumption -polypharm -brev -updates -aqp4 -##±7 -hemopoietic -definitively -extrapyramidal -ducks -intimately -geniculate -lipogenesis -decomposed -##iser -declared -stagn -dism -proportionally -hoech -cannulated -oophorectomy -##aration -2k -uteri -procoagulant -underpin -transforms -cfr -deoxyribonucleic -##edrine -intraf -jm -ctnt -marmos -sls -acridine -postr -causation -fz -mtp -disasters -tmt -pericytes -##acylglycerols -surpass -##ertz -so4 -##ynchus -spinning -myoclonus -relaxin -bioequ -hans -##ophenone -##fract -retroviruses -avulsion -collapsed -dorsi -alendronate -##months -cars -hypoal -midwif -##olae -morbidly -disappointing -dermatologic -disassembly -enumeration -hig -idus -ondansetron -nls -##ycholate -glen -successes -valproic -researched -pharmacogenetic -511 -alkanes -director -cnvs -electrospun -multiforme -pcdd -clerks -monophasic -milling -##a4 -stew -##roblast -hypovol -pastoris -supercritical -electrophilic -ehrlich -litters -sba -hut -contingency -anodic -euros -stec -##bridge -lyophilized -##laf -hexokinase -pits -unpublished -##wr -##iasmatic -gathering -##stand -monophyletic -join -ectoderm -##terp -flaw -finishing -##rosthesis -##nh -mofs -##aud -psma -##wire -tokyo -acylation -351 -triamcin -dcr -##emoglobin -hypopnea -hyperventilation -##aglu -##idated -##waters -paraf -334 -##otrich -escs -hrct -multistage -bug -israeli -reforms -hoechst -5alpha -337 -melit -341 -ebna -spindles -##opolymers -##otyrosine -quinoline -austria -nitrox -hfp -phl -ninth -dcd -al2o3 -##icates -plr -prominently -amphibians -##chid -##obiology -pyrrole -##azolamide -immunogold -boiling -microsatellites -subfamilies -oak -##omyositis -augments -flattened -azido -narcol -c10 -jac -transfectants -##imited -photos -sensitis -ceus -regained -##oxifene -##efaciens -biologics -##ething -explosion -informing -##ocampal -noncompl -tdf -hiaa -igan -arachnoid -deregulated -nanocl -glycolipid -adeno -##afs -aus -##anoate -multinucleated -##oraph -approximations -ameliorating -something -motivations -hydrolysate -##tase -endobronchial -extents -pdf -acetylglucosamine -boar -l6 -txa2 -##perfusion -##ogranin -vesico -ppe -posttranscription -hypospadias -iκb -mitigating -diving -outweigh -##isic -carniv -##hipp -inat -penetrated -ests -perinuclear -nymphs -telangiectasia -##ferior -porcelain -travers -midwifery -unimp -##atemia -fauna -glut1 -cholangiopancre -##olo -amper -phosphoenol -##ayered -repulsive -award -chimerism -porphyrins -exudates -##parous -##iprazole -factory -##illar -amplicons -##nitros -meps -##ordinate -spermatocytes -dct -janus -recurs -flexural -phylogen -##roplasties -##ador -jer -dyslipid -intrarenal -buildings -ureteric -lend -##oplication -plexiform -1100 -multisp -etched -biopsied -headspace -sunsc -healthier -##roscope -##onous -##qc -##olac -paraph -cellulitis -deliberate -fluoroquinolone -storing -##othio -malarial -##osylcer -##7t -varicocele -maternally -##emes -interpolation -chemoprevention -bx -intergroup -ivm -galpha -bra -cdk4 -rhabdomyolysis -pedigrees -violations -transmittance -pgh -hydrops -foliar -mids -##uy -syrian -##tick -triamcinolone -quinolone -serp -wett -dio -lactams -citrulline -acculturation -arena -aspirated -chicago -660 -deoxyglucose -haematoma -reinn -mdi -simplest -412 -prf -scand -toronto -armed -exfoli -undoubtedly -##ascin -dinitroph -arthropod -##yelinated -dichotomous -endotoxemia -propanol -archival -nachrs -handgrip -contusion -ucb -vlbw -nigros -autoreactive -numeric -repairing -o6 -pmt -flt -circr -radiolabelled -pmp -complements -eot -##omatoid -noble -339 -##dlers -damping -mall -gingiva -##owa -anaphylactic -incorrectly -enteritis -neurosurge -phylum -pcps -##olinic -##imoto -immunodom -amniocentesis -caval -unintentional -visualizing -offices -microsp -panoramic -impurity -lda -p7 -thyrotoxic -gibbs -hnp -overr -≥50 -proposition -##othoracic -valent -funds -boards -##artite -pleasant -monozyg -densitometry -dme -##rotin -coerc -37°c -mcv -##almit -diastole -sweating -crus -irritability -##iced -immunol -##ogly -remissions -##tening -digitor -##itica -intratracheal -51cr -distractor -tdr -##omicroscopy -beside -5mg -overexpress -fishing -situational -mog -distinguishes -appendage -multiplexed -srebp -accelerator -##enesulf -dependencies -fragilis -monoclinic -confounded -dang -neurologically -invariance -stakeholder -mosaicism -tartrate -##metry -elapsed -##eptidyl -symbiosis -##hz -prolyl -laminectomy -consulting -crm -mgo -nucleosomes -autoc -##ethoxy -7a -adiabatic -vre -odors -succinyl -hyperhom -kilobase -##obenzyl -monomethyl -xylanase -##ibrin -removable -biting -overlaps -addicts -turp -multich -##aco -suppressors -reappear -deceleration -catalyzing -cauda -##5ac -dips -nj -##ager -appliances -scalar -desaturase -artemisinin -vec -##uloplasmin -perforator -kindling -364 -orchid -tetrahedral -enoxaparin -trac -corynebacterium -##attern -photovoltaic -electricity -diffusing -p15 -abca1 -oct4 -auc0 -ero -##qx -##ithi -19th -atpases -desiccation -##idinyl -mre -respondent -rehydration -conclusively -medulloblastoma -advocates -pme -biophys -infiltrative -stereois -settlement -dr4 -barth -gpc -mentors -spd -phenanthrene -##ablation -##craft -cryotherapy -≥10 -decarbox -scrapie -##trium -hyperuricemia -ata -mp2 -##empfer -##egm -orthologous -facultative -bom -precancerous -##cytidine -diamine -pao -repeatable -chimera -communicative -mnsod -preadip -milligr -figo -igg2a -n0 -bifidobacterium -evidently -##hf -dendrimer -leprae -digitorum -menten -saponin -mould -spawning -dpc -pentyl -##enk -chur -ensembles -neuroleptics -##osfamide -absor -##v6 -octahedral -##answ -pulmon -356 -opacification -##adders -##aglobulin -tourniquet -enema -ferred -forget -nonverbal -iva -352 -meticulous -abscisic -burkholderia -gii -influent -##pots -botan -348 -dihydrox -micrometast -digitized -rs22 -lips -pfo -igd -visitors -cd11c -lights -emf -##engine -ethm -chlorpyr -cisterna -##opyranoside -autoradiographic -perchlorate -synonym -##ercise -stereochemistry -##wall -dyslexia -excursion -tesla -autophosphorylation -##ipin -upar -resurf -sterilized -granulomatosis -µmol -eicosapentaenoic -##castle -##ecretion -structuring -##phys -bulbar -ketoac -sydney -mnc -extramedullary -rhu -tropism -hypophosph -clarifying -judgement -jord -puzz -diplopia -deoxynucleotidyl -lympho -##alogy -##6c -clothing -tph +version https://git-lfs.github.com/spec/v1 +oid sha256:5481b866c574669923e968846d3f12f414af4a9758ae273ec65ae3ec1c9550b1 +size 225097 diff --git a/gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/version_vocab/vocab_ori.txt b/gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/version_vocab/vocab_ori.txt index 9d65c8495e044c70ce1a30e2ae8e2f0b3738dbae..6cd26c45a7e8702fb90e0aa048b32b0040a7ff24 100644 --- a/gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/version_vocab/vocab_ori.txt +++ b/gnorm_trained_models/BiomedNLP-PubMedBERT-base-uncased-abstract/version_vocab/vocab_ori.txt @@ -1,28895 +1,3 @@ -[PAD] -[UNK] -[CLS] -[SEP] -[MASK] -! -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -¡ -¢ -£ -¤ -¥ -¦ -§ -¨ -© -ª -« -¬ -® -¯ -° -± -² -³ -´ -µ -¶ -· -¸ -¹ -º -» -¼ -½ -¾ -¿ -× -ß -æ -ð -÷ -ø -þ -đ -ħ -ı -ĸ -ł -ŋ -œ -ƅ -ƈ -ƍ -ƒ -ƙ -ƛ -ƞ -ƭ -ƴ -ƶ -ƿ -ǀ -ǁ -ǂ -ǝ -ȣ -ȥ -ȵ -ȶ -ȼ -ɐ -ɑ -ɒ -ɓ -ɔ -ɕ -ɖ -ə -ɚ -ɛ -ɝ -ɡ -ɣ -ɤ -ɥ -ɨ -ɩ -ɪ -ɬ -ɭ -ɮ -ɯ -ɲ -ɳ -ɵ -ɷ -ɸ -ɹ -ɻ -ɾ -ɿ -ʀ -ʁ -ʂ -ʃ -ʅ -ʈ -ʉ -ʊ -ʋ -ʌ -ʎ -ʐ -ʑ -ʒ -ʔ -ʕ -ʘ -ʝ -ʟ -ʦ -ʧ -ʰ -ʱ -ʲ -ʷ -ʹ -ʺ -ʼ -ʾ -ˁ -˂ -˃ -˄ -ˆ -ˇ -ˉ -ː -ˑ -˖ -˘ -˙ -˚ -˜ -˝ -˞ -˟ -ˠ -ˤ -˪ -˭ -ˮ -˴ -ͳ -ͻ -΀ -΁ -΂ -΃ -΄ -΍ -α -β -γ -δ -ε -ζ -η -θ -ι -κ -λ -μ -ν -ξ -ο -π -ρ -ς -σ -τ -υ -φ -χ -ψ -ω -ϐ -ϑ -ϒ -ϕ -ϖ -ϝ -ϫ -ϭ -ϰ -ϱ -ϲ -ϵ -ϻ -ϼ -а -б -в -г -д -е -ж -з -и -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ь -э -ю -я -є -ѕ -і -ј -ћ -ѱ -ѳ -ѵ -ґ -қ -ҝ -ҡ -ҫ -ү -ұ -ҳ -һ -ӏ -ӕ -ә -ө -ӽ -ԏ -ԑ -՚ -־ -א -י -ץ -ר -ש -׳ -״ -، -ا -ة -ت -ح -خ -د -ر -ز -ش -ط -ع -ـ -ف -ل -م -و -٠ -١ -٢ -٤ -٪ -٭ -ۥ -ߚ -ߝ -ࣈ -क -च -ण -फ -र -ल -ा -० -ก -ข -ง -ต -ท -น -บ -พ -ฟ -ภ -ย -ร -ล -ว -ศ -ห -อ -ะ -า -฿ -แ -ใ -༌ -ခ -ᄀ -ᄁ -ᄂ -ᄃ -ᄄ -ᄅ -ᄆ -ᄇ -ᄉ -ᄋ -ᄌ -ᄎ -ᄏ -ᄐ -ᄑ -ᄒ -ᅟ -ᅡ -ᅢ -ᅣ -ᅥ -ᅦ -ᅧ -ᅨ -ᅩ -ᅪ -ᅬ -ᅭ -ᅮ -ᅯ -ᅰ -ᅱ -ᅲ -ᅳ -ᅴ -ᅵ -ᆨ -ᆩ -ᆪ -ᆫ -ᆭ -ᆯ -ᆲ -ᆷ -ᆸ -ᆺ -ᆻ -ᆼ -ᆾ -ᇀ -ᇂ -ᇞ -᛫ -ᴂ -ᴅ -ᴋ -ᴍ -ᴏ -ᴐ -ᴓ -ᴨ -ᴪ -ᴳ -ᴼ -ᵒ -ᵗ -ᵝ -ᵧ -ᵪ -ᵯ -ᵹ -ᶲ -᾽ -᾿ -῾ -‑ -‒ -— -― -‖ -‘ -’ -‚ -‛ -“ -” -„ -‟ -† -‡ -• -‥ -… -‧ -‰ -‱ -′ -″ -‴ -‹ -› -※ -‾ -‿ -⁁ -⁃ -⁄ -⁎ -⁓ -⁗ -⁰ -ⁱ -⁴ -⁵ -⁶ -⁷ -⁸ -⁹ -⁺ -⁻ -⁽ -⁾ -ⁿ -₀ -₁ -₂ -₃ -₄ -₅ -₆ -₇ -₈ -₉ -₋ -₌ -ₐ -ₓ -₣ -₤ -₦ -₩ -€ -₱ -₵ -₹ -₺ -ℂ -℃ -℅ -ℇ -ℋ -ℏ -ℐ -ℑ -ℒ -ℓ -ℕ -№ -℗ -ℙ -ℛ -ℜ -ℝ -℞ -℠ -™ -ℤ -℧ -ℬ -ℰ -ℱ -ℳ -ℴ -ℵ -ℽ -ⅅ -⅓ -⅔ -⅗ -⅙ -⅚ -⅛ -⅜ -ⅰ -ⅱ -ⅲ -ⅳ -ⅴ -ⅵ -ⅶ -ⅷ -ⅸ -ⅹ -ⅺ -ⅻ -← -↑ -→ -↓ -↔ -↕ -↗ -↘ -↙ -↝ -↦ -↷ -↼ -⇀ -⇄ -⇆ -⇋ -⇌ -⇐ -⇑ -⇒ -⇓ -⇔ -⇝ -⇨ -∀ -∂ -∅ -∆ -∇ -∈ -∊ -∋ -∎ -∏ -∐ -∑ -− -∓ -∕ -∖ -∗ -∘ -∙ -√ -∛ -∝ -∞ -∟ -∠ -∡ -∢ -∣ -∥ -∧ -∨ -∩ -∪ -∫ -∴ -∶ -∷ -∸ -∼ -∽ -∾ -≂ -≃ -≅ -≈ -≊ -≌ -≍ -≏ -≐ -≑ -≒ -≔ -≙ -≡ -≣ -≤ -≥ -≦ -≧ -≨ -≪ -≫ -≲ -≳ -≺ -≻ -≽ -≿ -⊂ -⊃ -⊆ -⊕ -⊖ -⊗ -⊘ -⊙ -⊞ -⊟ -⊠ -⊣ -⊤ -⊥ -⊿ -⋄ -⋅ -⋆ -⋊ -⋘ -⋙ -⋜ -⋝ -⋮ -⋯ -⌀ -⌈ -⌉ -⌊ -⌋ -⌜ -⌝ -⌢ -⌣ -⍴ -⍵ -⍺ -⎕ -⎼ -␣ -① -② -③ -④ -⑤ -⑥ -⑦ -⑧ -⑨ -⑩ -⑴ -⑵ -⑶ -ⓒ -ⓝ -ⓡ -─ -│ -├ -┤ -┬ -┴ -═ -║ -╪ -╳ -█ -░ -▒ -▓ -■ -□ -▪ -▫ -▬ -▯ -▲ -△ -▴ -▵ -▶ -▸ -▹ -► -▼ -▽ -▾ -▿ -◆ -◇ -◊ -○ -● -◦ -◻ -◽ -★ -☆ -☉ -☐ -☓ -☺ -♀ -♂ -♢ -♦ -♪ -♭ -♯ -✓ -✕ -✜ -✧ -✴ -✶ -➁ -➔ -➝ -➢ -➤ -⟂ -⟦ -⟧ -⟨ -⟩ -⟵ -⟶ -⦁ -⦵ -⧧ -⧸ -⧹ -⨉ -⨪ -⨯ -⩒ -⩼ -⩽ -⩾ -⩿ -⪅ -⪆ -⪕ -⪖ -⪝ -⪡ -⪢ -⫽ -⬄ -⬜ -⬡ -⬢ -⬰ -⬽ -ⱪ -⸱ -⿿ -、 -。 -〈 -〉 -《 -》 -「 -」 -【 -】 -〓 -〔 -〕 -〖 -〗 -〜 -〝 -〟 -ア -カ -ヒ -メ -リ -レ -・ -ㅣ -ㆍ -㎂ -㎍ -㎖ -㎛ -㎝ -㎟ -㎡ -㎶ -㒐 -丸 -参 -囊 -大 -射 -方 -气 -汤 -注 -消 -液 -清 -潜 -瘀 -益 -祛 -粒 -肝 -肾 -胶 -芪 -补 -颗 -饮 -骨 -꞉ -꞊ -ꞌ -ꞵ -ff -fi -fl -ffi -ffl -ſt -﴾ -﴿ -︰ -﹒ -﹛ -﹢ -﹣ -﹤ -﹥ -﹩ -$ -% -& -' -( -) -* -+ -, -- -. -: -; -< -= -> -? -[ -] -^ -_ -c -i -m -p -v -{ -| -} -~ -、 -・ -ア -オ -タ -モ -ᅲ -£ -¬ -¥ -₩ - -𝐑 -𝐟 -𝐫 -𝐴 -𝐸 -𝑃 -𝑐 -𝑑 -𝑒 -𝑓 -𝑖 -𝑛 -𝑜 -𝑟 -𝑡 -𝑥 -𝒆 -𝒙 -𝒞 -𝒟 -𝒦 -𝒩 -𝒪 -𝒫 -𝒮 -𝒯 -𝓟 -𝓣 -𝔇 -𝔐 -𝔹 -𝕊 -𝕜 -𝖱 -𝘗 -𝛂 -𝛆 -𝛼 -𝛽 -𝜀 -𝜃 -𝜅 -𝜇 -𝜋 -𝜌 -𝜒 -𝜖 -𝜗 -𝝁 -##y -##q -##e -##0 -##5 -##3 -##8 -##a -##g -##p -##c -##t -##4 -##6 -##7 -##2 -##h -##d -##i -##n -##o -##s -##r -##w -##u -##k -##b -##f -##l -##m -##z -##v -##1 -##9 -##⋮ -##° -##x -##j -##≈ -##₃ -##∙ -##λ -##⁻ -##μ -##ø -##∼ -##→ -##δ -##× -##™ -##∕ -##≫ -##β -##± -##₂ -##₆ -##⩽ -##€ -##® -##∓ -##α -##с -##γ -##£ -##≡ -##⋯ -##π -##═ -##⋆ -##ρ -##µ -##σ -##₅ -##₄ -##˚ -##ᅴ -##ᄇ -##ᅡ -##ᆼ -##ᄋ -##ᅲ -##ᄌ -##ᅱ -##₋ -##fi -##´ -##₇ -##ε -##ß -##+ -##η -##© -##б -##³ -##е -##ı -##□ -##∗ -##∶ -##⁄ -##− -##ϕ -##⊂ -##˙ -##º -##∆ -##↓ -##κ -##∞ -##⁺ -##₈ -##fl -##ι -##⁸ -##∷ -##⋅ -##² -##⁰ -##æ -##↔ -##₁ -##ζ -##τ -##÷ -##= -##∘ -##¹ -##⁷ -##⁶ -##χ -##ω -##ϒ -##ℏ -##ˆ -##ф -##о -##↑ -##⩾ -##φ -##¬ -##ϵ -##υ -##₉ -##θ -##ψ -##─ -##├ -##≪ -##ł -##♂ -##ℓ -##➔ -##ⅱ -##⁴ -##≳ -##ν -##~ -##√ -##≲ -##< -##к -##⁹ -##⊃ -##н -##∝ -##₀ -##⁵ -##¯ -##← -##ff -##΄ -##а -##ə -##◦ -##≃ -##≅ -##œ -##ɛ -##⧹ -##⍺ -##ⅰ -##ɑ -##ο -##⇒ -##♀ -##˂ -##○ -##▴ -##> -##⊗ -##р -##в -##ж -##℃ -##∈ -##∧ -##⊙ -##¢ -##м -##ᅭ -##ᅵ -##ᆫ -##ᅦ -##∑ -##у -##з -##л -##і -##≦ -##≧ -##ˮ -##≒ -##﹥ -##≊ -##¼ -##△ -##ᄅ -##ᅩ -##ᅥ -##ᆨ -##đ -##⊥ -##т -##ⅳ -##˃ -##ƒ -##ξ -##│ -##¾ -##ⅲ -##ð -##ϲ -##ⓒ -##␣ -##┴ -##∥ -##▪ -##⧧ -##𝛽 -##➝ -##ɣ -##∇ -##ш -##ᶲ -##и -##г -##♦ -##∫ -##∩ -##½ -##ſt -##⇋ -##ⅷ -##˜ -##⧸ -##¨ -##⨯ -##∣ -##ª -##∨ -##ĸ -##● -##▬ -##¥ -##ⅴ -##⇑ -##⇓ -##⇆ -##⬢ -##∏ -##ɕ -##℅ -##х -##⇌ -##ː -##∪ -##⊕ -##ǀ -##˖ -##ɒ -##⬜ -##ᆯ -##ᆸ -##д -##𝐑 -##⇀ -##˝ -##≐ -##þ -##↷ -##ɹ -##ʃ -##ƞ -##⟶ -##⇄ -##┤ -##ц -##я -##ߚ -##¸ -##п -##∴ -##⇔ -##№ -##ʱ -##↦ -##ͳ -##⪅ -##▵ -##⇝ -##ɚ -##ᆺ -##ɐ -##ɪ -##② -##ҡ -##⊠ -##∂ -##ꞌ -##≍ -##ˇ -##▒ -##ю -##☓ -##∖ -##ˉ -##┬ -##⎼ -##ₓ -##ς -##〓 -##¦ -##ᵒ -##⋘ -##ⁱ -##ᆷ -##ᅳ -##⅓ -##㎍ -##■ -##♯ -##΀ -##≿ -##ү -##ᄉ -##ϭ -##˞ -##ɸ -##∅ -##⪢ -##↝ -##ᴂ -##𝛆 -##ᵝ -##˄ -##ᵧ -##ᅪ -##ᄒ -##ᄀ -##ᅧ -##ᄃ -##∊ -##ᅟ -##¤ -##☉ -##ʰ -##ⓡ -##ᄆ -##ϐ -##ר -##א -##ש -##י -##| -##▓ -##ч -##ь -##ᄑ -##ᆩ -##ᄂ -##ˑ -##⊖ -##⦁ -##ℑ -##ℇ -##ⅺ -##⋜ -##᾿ -##ᅢ -##¥ -##║ -##є -##ₐ -##ᅮ -##ʼ -##қ -##ҫ -##ฟ -##น -##𝒯 -##⊟ -##฿ -##ɔ -##ʁ -##⋙ -##ʌ -##ᅬ -##ⅶ -##⪡ -##ϰ -##ᆾ -##⊘ -##ـ -##ᇞ -##▸ -##✕ -##⋄ -##ℤ -##ㅣ -##ffi -##↼ -##อ -##ง -##ᵗ -##ⅸ -##ɖ -##ffl -##ʊ -##⿿ -##╳ -##﹢ -##΍ -##▿ -##ˁ -##˭ -##≺ -##₣ -##ϖ -##↕ -##⬰ -##ᄏ -##⬡ -##⪆ -##↘ -##ᅰ -##₵ -##า -##ใ -##ต -##บ -##⊿ -##𝒞 -##ℒ -##ɳ -##ѱ -##ل -##ƛ -##ϱ -##⨉ -##𝑖 -##𝑓 -##𝑒 -##𝑟 -##𝑛 -##𝑡 -##𝑜 -##ว -##⋝ -##₌ -##ᄐ -##ℝ -##ŋ -##ᆻ -##ก -##∽ -##ѕ -##ӏ -##٢ -##᾽ -##ħ -##⬽ -##ɡ -##ㆍ -##^ -##ɩ -##ᇂ -##℧ -##ⅻ -##ǁ -##ɷ -##ͻ -##⌀ -##𝑐 -##ѵ -##ɤ -##㎛ -##˘ -##ʒ -##꞊ -##ा -##क -##𝜀 -##ᴏ -##ⅵ -##¬ -##タ -##モ -##ℱ -##ᴼ -##ア -##ʲ -##ј -##د -##م -##ة -##♭ -##ᴋ -##΃ -##₺ -##℗ -##꞉ -##ʹ -##ᴐ -##ز -##ع -##ر -##و -##ᆭ -##↗ -##﹤ -##ᅨ -##ƴ -##𝑑 -##メ -##リ -##カ -##≽ -##≣ -##ʾ -##ᄎ -##ᅯ -##΁ -##⊆ -##ƙ -##ʂ -##ℕ -##ش -##ا -##ف -##⟂ -##≏ -##ᆲ -##ᆪ -##ʉ -##オ -##℠ -##m -##ƭ -##ल -##⊣ -##ˤ -##ᄄ -##һ -##∀ -##ᅣ -##░ -##ɾ -##∟ -##ꞵ -##≔ -##١ -##ت -##✓ -##ʘ -##▼ -##ɮ -##i -##v -##ʐ -##٠ -##≑ -##₦ -##ヒ -##ǂ -##𝜃 -##℞ -##⌢ -##ϑ -##ߝ -##ɵ -##⩿ -##ȥ -##ⱪ -##⅔ -##ȼ -##レ -##ⁿ -##˴ -##𝒆 -##♪ -##☆ -##⨪ -##𝓣 -##█ -##٤ -##⩒ -##ɬ -##ƈ -##ᴍ -##⌣ -##𝜅 -##ʷ -##③ -##≤ -##ћ -##∠ -##э -##ѳ -##p -##ℳ -##◊ -##र -##ण -##𝐟 -##∎ -##⇐ -##ʎ -##☺ -##⟵ -##ℜ -##㎖ -##ˠ -##แ -##ล -##ะ -##ข -##ภ -##พ -##ย -##ร -##ท -##ศ -##ʺ -##ⅹ -##⫽ -##① -##∋ -##ۥ -##ℂ -##≻ -##ᴪ -##ǝ -##ᄁ -##΂ -##ȣ -##𝜋 -##ɓ -##ɯ -##ᴓ -##ө -##⑩ -##ℵ -##₩ -##★ -##𝛼 -##ᇀ -##④ -##ti -th -##er -##on -##en -##es -##ed -##in -the -##al -in -an -##or -of -##at -##an -##ro -##tion -and -##as -##it -##ic -##ar -##is -##ent -##ec -##re -##el -##ing -to -##ation -##ul -##et -##ol -##om -##ac -##ur -##os -##ith -##us -##ve -##id -##ati -with -##ly -##le -con -st -##th -##ere -##ter -##ig -pro -for -re -##ts -##uc -##od -##il -##em -as -##if -was -##ated -##un -##ess -##am -##ce -##im -##tr -##ow -ex -is -res -##um -##oc -were -##op -##ut -##tic -##ity -##ir -##ion -on -##ents -ac -com -##ate -that -##ab -##ot -##og -al -be -##ys -ch -##ud -##ev -##ag -##ell -by -##ad -##ain -##ap -##rom -wh -##ical -##ect -##ant -or -pr -##ers -##ib -su -##pl -##ine -##ment -pati -##ase -##iv -##ep -im -##tin -tr -##ff -dis -at -##igh -ad -us -we -##tiv -inc -this -##ph -comp -patients -##ence -are -##qu -sp -en -from -##ure -cell -stud -##tive -##iz -##ay -##ign -##ial -sh -un -##ific -ne -ind -##te -met -##ies -##ch -##ations -per -##fer -##per -##ress -rec -##ous -##ort -##ip -##orm -##ans -ass -##ens -ph -eff -##ear -##evel -##ore -resul -##rou -##ge -##ons -##oun -cl -##ia -##ely -gen -##est -ap -##act -##ting -sign -##bs -##enti -inf -##ular -##ary -##ition -##yp -dif -##oth -##olog -par -bet -##erm -high -pre -inter -##ese -rel -##ased -can -##cl -cont -tre -##ver -not -pl -results -ha -##ors -co -de -##ding -imp -##yl -these -meth -ev -study -##ym -resp -car -exp -##ich -pos -##roup -##ist -me -##st -##der -differ -str -##enc -trans -##ance -##ose -incre -##ative -di -group -signific -##ath -am -##een -##duc -anal -##ection -sur -##ied -significant -cells -reg -mod -##and -##ast -##ong -##ual -det -af -rep -spec -pres -##iti -cons -activ -all -method -which -##ween -##able -have -##age -between -sc -inv -show -##vi -##rol -mic -##all -##ory -##00 -##oci -ab -int -##end -##ide -after -ar -##ression -##oll -##ound -cor -##ater -treat -##atin -he -##ros -than -no -##ach -##ved -##usion -##ug -compar -##ects -tw -##ally -##ari -##ected -##tein -##one -se -##alu -##yst -##ther -level -anti -##ight -##yn -##ates -##unc -med -app -ag -##ever -##ases -##ech -##cr -associ -##ost -##ive -treatment -##ak -##ective -##our -using -##form -it -##tim -le -##ility -bl -##ome -but -##ox -has -been -protein -##entr -two -##unction -des -dur -##ted -##na -control -analys -##ulation -el -ob -##ood -##ized -bi -##ating -##ard -##evelop -##ue -more -low -dec -develop -hy -may -##mun -##ang -##anc -both -##osis -##dition -##so -mon -sub -clin -##ree -ca -year -ser -##action -dise -10 -dat -fl -their -red -##ass -##yt -##hib -##cess -also -tim -dep -##gg -##pt -##ill -##ob -##acter -##ri -##ures -syst -used -concl -##ub -##cer -##uced -during -##ectiv -there -id -##ined -##ological -##erap -had -##agn -##iss -function -##ism -fact -meas -dr -##uld -system -respons -##ogen -man -##ew -vari -gl -clinical -obs -data -em -##br -##out -suc -incl -##gen -##ond -heal -##ution -##atic -es -how -methods -out -associated -##pp -under -sim -##urr -present -form -##plic -##ack -analysis -sugg -sam -over -acc -##aph -##etic -##ium -one -model -conclusion -includ -ris -foll -health -typ -##uct -##ult -increased -produc -use -##inding -activity -compared -significantly -pol -##ays -disease -non -conc -up -syn -##ained -inhib -expression -levels -ins -obser -##gr -##ven -other -end -found -##ace -##ole -##ization -follow -time -##ici -qu -##entif -##ild -oc -##eth -##ite -##ential -perform -##itive -immun -20 -##ogr -##und -risk -fr -exper -##tig -##ane -dem -evalu -gr -determ -cr -##ants -pot -##ail -different -studies -effect -br -hum -identif -effects -showed -##ments -indic -##ile -its -based -exam -##atory -##ish -appro -##tal -prov -##row -##estig -therap -investig -char -partic -concentr -##oid -##als -however -tum -sm -path -ma -report -min -sel -##mon -##ery -##ities -most -##ences -##adi -specific -12 -infl -##ectively -diagn -##onstr -##ma -assess -##ple -contr -rem -##esis -##ency -ep -when -##ination -our -years -##av -subs -##idence -##etr -##tern -def -op -mul -supp -##ormal -acid -age -##ren -into -decre -##ability -only -##ange -well -higher -intr -##ograph -##tical -##oph -three -19 -##erg -##ork -cancer -##iqu -##ently -test -##ions -##ited -peri -##ology -##ber -##inal -pred -gener -mal -human -##echan -##ept -induced -##ature -##arg -##ement -such -##af -mol -new -##ible -impro -observed -character -import -num -sec -struct -mechan -##til -##ord -groups -##ix -##ik -patient -##ov -##ugh -##rel -fir -sens -demonstr -child -##rop -occ -col -##res -ox -chang -loc -##tid -##ron -##gh -rate -##aining -blood -kn -##lex -cases -##ute -post -related -##use -surg -process -##cop -##ark -tiss -micro -##ok -##ured -term -rat -who -##tit -fe -first -##view -##oma -fur -suggest -valu -aff -##uction -gene -response -mem -disc -addition -##amm -##echn -##ural -cal -po -##rough -neu -##ough -##dro -type -##ten -##osph -inj -those -grow -##och -development -##osed -##own -techn -##round -##ior -among -sol -##ful -respectively -##its -##ulated -condition -conf -##ines -mean -care -cd -##vention -##ental -molec -recept -##ucle -15 -##ital -##les -month -drug -factors -del -through -mg -##ains -##arly -pop -total -hyp -ps -ol -##omen -changes -il -##fore -increase -prim -cyt -bas -##ize -correl -role -invol -00 -potential -##di -##ivid -obj -##endent -prop -long -prob -experim -##ould -phys -##ational -##asc -##tained -plas -subj -##xim -fre -normal -gre -##crib -##urs -finding -my -##earch -medi -mus -important -##ived -stim -back -adm -##eng -lower -main -##vers -predic -##roph -##omy -could -simil -##led -mechanism -11 -##over -##istic -##ha -ii -##ex -##ek -within -sym -hist -##ds -##cin -performed -##gan -infection -so -##air -where -##eter -##ody -sequ -number -##ene -comb -frequ -##osp -lik -extr -sever -hyper -therapy -95 -review -##ms -conclusions -prog -##ption -##ps -outc -less -coll -##ider -while -multi -##ade -flu -##cle -mm -cap -each -##ether -children -requ -antib -hydro -bec -14 -treated -reported -##iver -lim -including -##val -similar -##ages -period -poss -vir -growth -tumor -do -fib -some -complex -aim -##orph -ext -##ores -##ock -women -##vious -##omes -positive -##arge -radi -pur -describ -pat -reve -##ne -200 -13 -dna -findings -##ric -##otic -cy -chem -factor -##ptom -prom -did -without -##ival -ret -30 -inhibit -##pha -week -mo -cardi -##ividual -mut -work -##ples -prot -##ground -tem -er -case -##zym -four -##iel -abs -##tivity -mice -they -##ily -individual -##ues -consider -dir -background -25 -##istr -##abol -tissue -calc -18 -days -approach -pe -av -##ascular -##ativ -surf -##ness -months -previous -vit -##ise -need -curr -##ale -##otyp -isol -##oper -phosph -identified -common -sing -measure -##ically -reveal -##ty -hem -maj -##ake -##iter -sev -major -vis -##oh -##ross -beha -synth -techniqu -##equ -organ -nucle -design -caus -##fl -##are -presence -research -##plet -behavi -analy -second -provid -mark -##ality -differences -##ocyt -##med -##ified -further -##uss -surv -mater -vol -symptom -reduced -##atal -small -16 -metabol -24 -obtained -##ced -binding -hosp -class -serum -dose -##ning -influ -##plications -rats -##atis -##crip -cult -mat -##par -initi -evidence -##ymph -due -single -early -##ography -##ice -und -sk -day -lip -expl -inform -admin -##acy -about -species -##ety -##lish -##ung -##ties -enzym -##ft -decreased -sl -##ately -conditions -05 -receptor -##itis -bre -primary -alth -##ension -lymph -50 -although -os -subjects -##hip -##esting -##ogn -qual -diagnosis -enh -lab -ele -measured -##ites -concentration -##old -##ink -ml -rati -particip -##trib -##int -arter -##ins -ci -##ressed -proteins -studied -neg -##roscop -concentrations -fem -population -##ectr -##oles -##las -pers -##ither -##ests -resist -improve -100 -alpha -mr -lif -rece -hep -should -beta -##ication -range -##ateg -brain -pattern -genes -developed -investigated -##ials -##ych -objective -samples -##ength -##gf -plasma -line -press -lead -set -phen -membr -if -ec -following -##erv -interaction -revealed -##vir -##emia -##erc -##amic -aut -##aging -reaction -demonstrated -##arget -17 -der -discuss -analyz -coun -dependent -target -##plant -anim -surgery -ang -prol -##ening -##onal -vs -several -chron -surface -##eptid -##ulin -est -fam -relations -##hy -inflamm -##tan -##por -##osure -oper -##ind -gluc -##ield -##aneous -large -proper -molecular -hospital -stand -##roduc -elect -kg -adv -##ick -les -here -examined -##asing -complet -##cent -##oss -survival -appear -weight -prof -functional -chrom -ul -effective -hear -##ensity -evaluated -requir -information -liver -unders -proced -values -quan -deg -again -##tered -##az -prac -local -201 -phase -estim -polym -stress -##cs -electr -##tis -##ax -determined -region -##tions -val -standard -diff -ir -neur -##ire -bone -param -relationship -nec -whether -rates -01 -determine -acute -##ause -chronic -mass -body -free -cle -##arm -pain -sch -sus -diab -enhanc -water -occurr -either -equ -before -pul -life -##ving -intervention -bacter -will -001 -characteristic -pressure -conduc -##onic -gu -##omic -included -greater -ur -quality -##ian -same -various -##yr -##ivers -##icient -reduction -pathw -activation -structure -##uth -provide -injur -dim -##sp -thus -##viron -support -against -many -##plication -##ator -##ulations -manag -hel -ro -ratio -##ocytes -##orb -contrib -ren -shown -##lement -cir -multiple -vitro -nov -##ification -##iven -signal -##tain -exposure -##amin -symptoms -known -gly -current -##atively -direct -##operative -sep -medical -dom -##ules -purp -psych -prec -exc -nan -whereas -vi -negative -genetic -production -because -distrib -carb -##andom -##eters -##ission -ach -fail -flow -formation -21 -fac -##eu -sal -environ -##rome -detected -##amine -adj -random -compound -##ostic -##ellular -rele -40 -strong -##gram -##ensive -##tric -##ateral -##uted -recom -##ists -##ometr -possible -pub -transcrip -mort -##itivity -##ude -association -loss -part -nm -##up -##emic -pap -any -##cept -reduc -compon -##ae -##ct -ms -##icity -change -repres -##ocy -##ynamic -##nal -controls -membrane -success -order -##eration -confir -sex -involved -muc -weeks -cent -##otherap -six -performance -muscle -responses -avail -general -size -##atus -mechanisms -##tif -difference -oxid -##gn -commun -mortality -particular -lo -virus -##ablish -temper -##ient -properties -tox -hypoth -indep -five -establish -strateg -assessed -cycl -models -experi -lung -statis -consist -##ann -resid -##uk -synd -differenti -##elial -##istration -state -increasing -overall -##atures -behavior -##ision -carcin -purpose -identify -men -000 -##ochem -22 -sin -old -60 -##ward -novel -management -##olution -contin -cm -regul -##aw -heart -##aps -##ised -super -diet -poly -presented -being -analyzed -proble -amin -##inant -scre -rap -mac -##ologic -##eral -neuro -magn -ct -surgical -link -##estion -contrast -##eutic -##app -area -like -self -imaging -##opath -death -phot -foc -##ording -intra -frequency -viv -environment -ov -##acc -eth -##iciency -##iving -evaluate -volum -allow -##reg -##itor -active -dys -outcomes -##ie -circ -preval -##dr -##active -result -recent -therefore -##otherapy -parameters -severe -outcome -##eric -centr -peptid -stage -value -vivo -very -isolated -##elet -add -##ographic -tra -distribution -##ergy -short -indicate -technique -decrease -##ust -mit -enzyme -##oses -resistance -##odies -##etes -enc -##verage -##lor -detection -participants -morph -nit -index -sw -injury -experimental -renal -optim -##tinal -##orts -systems -male -hom -induc -23 -administration -block -site -cross -previously -##ides -inhibition -stimul -accur -sensitivity -vers -diseases -##icular -ve -##ogenesis -dev -characteristics -##ended -healthy -##osition -##ably -left -tested -syndrome -##tex -##amp -described -prepar -exhib -gas -assay -therapeutic -pregn -rapid -##ocial -bene -##ule -att -highly -##asis -considered -animals -##rh -##tegr -glucose -##ancy -##iat -##ull -##oplas -artic -fat -mediated -investigate -maxim -rh -benef -fil -correlation -incidence -##inc -followed -atten -available -containing -199 -##ours -##eding -family -##iological -##ns -lesions -cere -sequence -then -##tically -##de -status -##ode -saf -##ylation -tub -##ances -better -##ices -##ogenic -types -kin -sample -da -##ids -##ouse -##ters -improved -##ially -##of -prolif -##red -hiv -relative -##roscopy -##inary -caused -##olic -##opt -impact -28 -##ops -##ount -derived -##ner -individuals -##ocard -energy -breast -##ulf -##ually -##ands -surve -nor -required -##itation -her -effic -##ces -via -inflammatory -##otype -##verse -insulin -cause -pharm -te -release -solution -##els -integr -temperature -##iev -cardiac -field -##plied -hypert -##ygen -pc -##ey -##acellular -adh -secre -percent -net -amino -record -##terior -bel -ultr -35 -sites -ability -dist -poor -spect -mrna -##omp -expressed -influence -approxim -combination -assessment -produced -analyses -conducted -physical -prevalence -light -##arc -##eline -program -##therm -##ves -received -comparis -indicated -practi -##itu -useful -length -liter -know -focus -volume -##roc -##line -##oy -complications -##ectomy -them -given -elev -content -refer -##tial -skin -diabetes -##pr -tak -##tine -##monary -##ressive -##ling -##pec -monitor -abn -evaluation -sour -metast -visual -##ortion -##romb -##ibility -drugs -26 -ver -density -larg -features -##cence -adult -recover -average -independent -##ols -likely -even -correlated -heter -efficacy -scale -ill -valid -according -hc -immune -##ysis -synthesis -tumors -cat -continu -quantit -lig -dam -impair -mar -separ -compounds -diagnostic -acet -hr -##abil -##enz -injection -abnormal -applied -##ones -play -question -characterized -coh -often -apopt -orig -##uble -recogn -##izing -thir -ventr -good -demonstrate -initial -cop -off -##vent -oral -##resp -##order -##go -ic -cre -introduc -chall -vascular -##cep -##mit -social -recurr -activities -medic -oste -central -transcription -angi -transfer -fraction -##orders -##ators -##tanding -##oung -might -young -##vement -spectr -27 -##medi -least -paper -mix -45 -made -##mic -##orption -failure -tissues -glyc -pa -##otox -predict -pd -util -##ored -measures -##tle -cellular -##ising -carr -##ecting -underw -enhanced -represent -cs -##enced -32 -measurements -receptors -##rees -disorders -score -transplant -biological -nat -female -aden -areas -##thermore -furthermore -experiments -cri -procedure -##ledge -##yro -food -##ask -##ta -stimulation -asp -understanding -cur -epid -techniques -subsequ -literature -times -regulation -successful -knowledge -##ales -##art -90 -discussed -emerg -moder -strains -rna -diss -adul -80 -minim -regression -31 -observ -##ables -##work -combined -regions -degrees -36 -processes -##rob -position -repe -dynamic -##tib -##omal -approximately -##lic -tests -since -fin -ed -oxygen -few -underwent -baseline -arr -patterns -29 -pathway -##ocyte -median -would -culture -##uture -##bers -##igr -events -##rogen -step -affected -##ustr -bm -proposed -##olar -48 -artery -air -prior -thromb -nurs -progn -smok -occurred -pulmonary -resulted -ey -right -collected -confirmed -limited -antibodies -fluores -##ont -biom -coron -ess -##iotic -suggested -##rosp -educ -##opro -antibody -interactions -pcr -##omas -##eness -##erve -antigen -neurons -cogn -##da -##ier -##ochemical -achiev -epith -##ytic -section -ax -##ower -sensitive -methyl -electro -mid -key -33 -criter -constr -##lying -hydrox -agg -##iatric -##otor -horm -##till -suggests -##ru -complete -pharmac -labor -network -comparison -practice -versus -##ler -lack -auth -gam -refl -materials -serv -inhibited -carcinoma -##utr -suggesting -increases -alter -recomm -structures -endoth -application -established -setting -trials -70 -seen -guid -additional -exerc -chemical -##ke -place -hal -vacc -history -down -invas -isch -catal -altern -periph -cer -##ovascular -controlled -##urn -37 -malign -screening -coronary -relev -across -alc -iii -provides -chain -lay -accum -##10 -examination -strain -corr -transp -##ave -mir -duration -adults -scores -displ -electron -##ious -disorder -bir -action -prost -promo -##ploy -depend -mouse -espec -plat -iv -substr -especially -hand -secondary -##onding -future -##ining -##istered -##inetic -mel -training -capac -##ogene -seg -75 -##acts -corresp -selected -eight -proliferation -consum -##rosis -##terol -sil -defined -##nf -rad -##oscop -##oxid -##ethyl -##fusion -##emb -##last -agents -sum -##bral -sle -##ii -alcoh -##iratory -##pa -inhibitor -lat -colon -peak -##grad -detect -polymer -plac -34 -##utes -bar -##ondr -et -02 -kid -recently -help -toward -article -still -##set -ultras -##aff -##ture -desp -##me -basis -kidne -side -##ld -ref -go -ant -despite -##orbid -cours -components -##ying -damage -glut -lit -head -##estinal -lipid -leuk -molecules -chlor -describe -structural -##agon -suff -postoperative -clear -bil -phosphor -critical -improvement -apoptosis -##bry -prosp -mes -fold -hip -peripheral -diffic -##eph -cost -##ibr -graf -##tre -adap -criteria -distin -##ets -metabolic -##agen -fer -dop -kinase -ion -necess -chann -##ancre -real -##isting -##yroid -signaling -exercise -##ochondr -chromos -populations -calcium -don -peptide -bio -infected -provided -daily -##iting -depression -doc -alone -nerve -differentiation -##ectives -aged -task -strategies -##odes -load -prevent -trial -sem -##occ -great -affect -ra -series -functions -##ergic -does -therm -rang -na -od -##esth -interval -ampl -fet -activated -consistent -facil -point -situ -##entially -embry -concer -sn -##anol -essential -perce -##pre -testing -42 -eng -##iversity -retrosp -cou -frag -sod -##enting -elevated -moreover -##ints -##50 -acids -38 -employ -resulting -cognitive -importance -reduce -plant -community -hours -##ems -top -pancre -prote -absence -onset -mitochondr -cryst -glob -marked -pregnancy -simple -##ove -myocard -medium -rare -##ather -products -linear -implant -convention -degree -##otid -##umin -survey -##ptake -questionna -remains -achieved -log -##omer -transport -las -lines -access -reli -sci -particularly -migr -seven -infections -##bo -material -##oint -microm -natural -##ulate -capacity -transform -sulf -##oids -toler -lear -##function -metabolism -##ca -pig -cytok -##trans -##arding -##ument -diagnosed -##ophil -##ople -##olesterol -fract -power -##ef -##uter -extract -procedures -older -problems -##etry -##ayed -##pri -states -open -markers -##ivery -transl -transm -require -##ogl -fluid -dig -doses -appropri -##dom -quantitative -##arb -delivery -ear -bacterial -##oster -##12 -exposed -examine -label -underst -nutr -upon -##arr -recovery -wor -##ics -conventional -highest -magnetic -uptake -carbon -tog -stable -progression -##utaneous -biops -undergo -variables -reproduc -together -clus -pathways -##put -##ortic -experience -proportion -induction -##ched -cardiovascular -course -calculated -stro -rest -##thr -##imens -experiment -stem -statistically -sequences -administered -##icians -strength -gamma -maximum -cycle -##nas -irr -##ogenous -origin -##atitis -consequ -pair -##uff -##se -act -##isms -component -kidney -##itional -adverse -estimated -iss -##encing -respond -whole -host -nuclear -regarding -summ -people -alcohol -ge -##brid -cohort -polymorph -##icial -##eti -respiratory -resistant -internal -bro -##oz -antagon -pp -65 -wall -linked -##ession -carried -tempor -public -##gl -much -198 -prefer -lact -objectives -ventricular -include -safety -inflammation -##inity -43 -little -needed -03 -##idine -39 -mutations -domain -##anced -wid -inhibitors -variation -##titution -mak -##osa -##istry -mamm -sleep -##rolog -environmental -##ank -hypertension -special -severity -best -modified -##other -fas -terms -birth -##anning -dimension -systemic -laboratory -##rix -distinct -specificity -protoc -##hydro -exhibited -##pling -hb -animal -ped -##acr -along -motor -subsequent -##activity -bp -taken -##ered -var -ster -relatively -endothelial -stroke -##roscopic -ng -identification -pt -authors -const -recorded -problem -fix -home -pm -cholesterol -alg -##ceptib -monitoring -44 -countr -evolution -##esized -chemotherapy -##encies -appar -selective -conn -radiation -microb -indicating -##ense -cys -mt -##cler -having -55 -##oding -##bp -atp -susceptib -bal -randomized -##ulating -##uring -spati -his -dysfunction -star -##ea -##ococc -##sh -acqu -late -yield -amount -macroph -degrad -reson -spe -wide -incor -appropriate -hypothesis -##inated -limit -##atives -traum -restric -mc -##eff -relevant -confidence -liqu -##io -able -moderate -##que -thick -source -physiological -measurement -chromat -bacteria -bound -remov -recomb -##onomic -platelet -##dl -excl -##otypes -interventions -##utive -causes -microscopy -treatments -##mediate -designed -tom -composition -underlying -##ame -ht -hs -sd -##oral -arterial -double -corresponding -matrix -72 -intracellular -adoles -##fact -mainly -grade -efficiency -##kn -compare -accuracy -approaches -41 -hybrid -males -stre -implement -hepat -quantif -intensity -gran -occur -soci -ven -substan -alk -contex -attention -prevention -cp -continuous -means -##rin -contribute -##urb -search -cos -46 -##itude -cerebral -females -##ights -strategy -##oxy -##uded -antibiotic -diabetic -mental -trig -##fs -##ressing -education -wild -difficult -famil -myocardial -alternative -anterior -developing -later -necessary -repair -gastric -terminal -##hood -world -##ometry -selection -cho -reach -though -##oprotein -adjus -##uration -detail -half -secretion -reviewed -shif -arg -memory -ter -frequently -tnf -chin -national -##amide -##neum -extent -99 -images -susp -promot -##anes -##ending -sodium -predicted -shows -chol -opp -##yth -##ique -infants -##ams -04 -stability -stimulated -##ariate -shap -##enty -directly -cd4 -52 -students -regard -repl -##oglob -bur -forms -challeng -genome -intake -transition -##rine -color -imm -appears -tool -##activ -##ished -##ometric -##ges -##illary -gel -persist -third -toxicity -##ool -adren -full -epithelial -incorpor -inh -larger -leading -facilit -47 -lesion -err -interfer -pi -hepatic -instr -isolates -generation -revers -plasm -hormone -##orn -mil -she -rs -view -resolution -coli -prospective -diam -last -##cc -processing -##yg -multiv -tetr -deriv -led -##iety -iso -reports -bo -transplantation -##izes -viral -atr -ta -generated -must -##ady -applications -##orbidity -neurop -67 -cand -##11 -##ee -eu -##eal -involvement -liquid -americ -56 -questionnaire -inhibitory -datab -regulated -metal -aimed -vas -##ils -eryth -##tively -fluorescence -simult -##plicated -placebo -##icle -pneum -mri -allel -above -strongly -##orrh -enzymes -simultaneous -ever -complexes -accumulation -prepared -learning -reactions -54 -rather -##ench -##ectiveness -adhes -##etric -analog -remained -##ened -##ering -enter -variety -lys -alb -##enic -##obacter -##ings -ca2 -gast -context -64 -##ope -investigation -effectiveness -urinary -advanced -eas -unc -sperm -candid -smoking -categ -exist -##lu -anx -mutation -tb -conj -clinically -unique -comm -49 -resonance -spinal -satis -reconstr -rab -yet -statistical -oxidative -unit -myel -princ -reference -##tile -53 -plants -##uv -percentage -nerv -dm -haem -##ector -##venous -stages -nine -substrate -reas -incub -hp -cath -##ively -profil -malignant -vess -disp -##tially -##ecutive -soft -##ga -finally -rt -ss -optimal -grad -lateral -kinetic -##olip -pyr -cortex -mechanical -##ars -bond -resection -subun -ns -specif -##land -removal -emb -agent -partial -systematic -lum -read -##ots -laser -publish -layer -##artic -specimens -responsible -pb -longer -dimensional -maternal -ann -introduction -85 -profile -transmission -significance -extre -##man -supplement -##oxide -##ella -##arily -white -##la -paras -unkn -##opathy -services -iron -squ -fatty -unknown -cultures -efficient -recurrence -##orting -certain -ten -mild -interest -asth -collagen -call -spont -image -mob -cas -##lation -trad -fish -##irus -induce -##ired -dog -clos -spontaneous -produce -51 -contact -57 -pass -highl -now -way -mitochondrial -##iology -58 -##aces -retrospective -affinity -##otide -##ectal -proj -##tral -##estern -consumption -product -62 -##aine -auto -commonly -##acch -assays -spatial -neural -infer -advant -invasive -fetal -fluor -interpre -##onucle -published -weak -##inations -rot -ure -##zed -replac -trend -##ula -##onia -##hold -majority -antic -63 -##ident -neon -ray -aw -predom -interview -numer -##otrop -rou -center -fav -olig -currently -germ -obes -global -ble -cytotox -##tation -nature -dynamics -cerv -extracellular -##erative -##astic -##ients -##den -intern -medicine -fed -tri -mutant -regulatory -infusion -formed -nanop -temporal -obesity -event -##elling -operation -subst -irradi -divid -implications -##dominal -aer -hemat -constit -fit -sexual -basal -potentially -extern -prostate -differential -coeff -96 -##itr -slow -occurs -upper -vitamin -aggreg -anat -##pled -specifically -countries -force -##ieve -intestinal -asym -59 -pg -66 -reactive -##ohist -##for -melan -ste -##phal -constant -near -agre -dil -marker -##o2 -68 -attrib -receiving -##itable -rr -points -staining -chromatography -neurolog -biopsy -histological -##arin -thyroid -cu -confirm -##ify -relation -##itary -segment -norm -prolong -algor -micros -manner -##erved -reducing -algorith -profession -##lyc -recommend -solid -immunohist -western -sa -anesth -gender -ves -tend -electroph -den -##alian -##obic -posterior -successfully -##most -involving -generally -aims -attenu -expected -become -gastro -morbidity -twenty -##aged -fram -broad -completed -cultured -organic -base -toxic -agon -##ectin -degradation -acetyl -ech -located -73 -assist -molecule -past -pse -dietary -##itone -adip -sr -core -avoid -resour -another -frequent -##ocal -intravenous -##tration -cad -odds -mixed -barri -cervical -prognosis -tract -relationships -rib -tel -purified -##ocl -external -matched -anxiety -aortic -##ytes -##osin -radical -widely -##cers -cb -##less -members -##ching -benefit -thres -oh -ds -abnormalities -abdominal -humans -cf -##tility -dl -joint -orth -benz -##onin -pseud -theory -##eta -neuronal -tomography -ifn -##hyth -recru -conclude -0001 -undergoing -predictive -##uls -absorption -ads -igg -attem -disch -person -promoter -##ories -eye -living -78 -##ema -adjusted -divided -thor -wave -##osing -manif -assemb -##sis -##ared -map -prognostic -hydroxy -flex -rabb -##odynamic -leg -retin -slight -##gens -cc -conform -space -##ields -almost -pathogenesis -consecutive -determination -particles -fo -comparable -interf -##oring -altered -vary -##eletal -far -subc -61 -peptides -##ontal -associations -##elium -residues -contribution -##lasts -defin -graft -##otion -diameter -optical -##to -##enge -complement -##uts -remain -around -inser -##olytic -##53 -##arian -variable -oxide -gh -##ections -##adder -account -elder -rob -aspects -sed -alterations -76 -throughout -feed -du -classification -74 -chromosome -accurate -##he -##ls -##aryn -##onch -channel -soil -##alk -69 -accept -rev -superior -##aring -##crim -phosphorylation -nanopartic -##elines -respect -usually -document -impaired -pancreatic -##olved -ga -hypox -##ounts -protocol -antioxid -gal -ligand -exch -##ico -promising -recip -##ogenetic -hydrogen -elements -pk -##esia -heterogene -leads -concept -recombinant -##16 -##ologies -lymphocytes -behavioral -recognition -convers -thickness -98 -migration -ful -phenotype -challenge -nas -gest -86 -allows -sources -roles -##aves -spl -discover -myc -83 -discrim -every -vaccine -lap -wound -fung -emph -urine -plus -##osyn -82 -behaviour -77 -##osite -##tering -92 -##rot -ana -tumour -ratios -proxim -elderly -##immun -vel -asthma -making -units -84 -impairment -uv -guidelines -ow -ultrasound -deficiency -ty -plays -elim -school -##isc -hipp -som -##dle -##itud -88 -cortical -71 -obstr -fast -membranes -requires -rapidly -infarc -cord -occurrence -##ats -root -pac -phosphate -extraction -##ori -repeated -bronch -basic -adequ -polar -synthesized -cereb -por -sph -##ention -spectroscopy -##uary -profiles -appeared -mode -##osine -nf -pen -spectrum -numbers -macrophages -cis -biochemical -fragment -93 -##ermal -multivariate -##erex -gp -neut -pla -computed -epidem -##ectivity -device -##lo -epis -md -inn -abund -traditional -##reh -97 -decision -needs -##iation -##13 -polymerase -extrem -explore -##ocamp -##isation -marrow -occl -potent -hepatitis -until -enrol -##ophys -87 -heat -europ -pal -79 -cv -offer -pathological -##acchar -##apping -safe -signs -##imen -subt -compl -94 -phenomen -##flu -signals -manifest -##gramm -illness -variability -##icles -predominant -programs -##used -cav -nod -nucleus -##ral -##pati -descrip -physicians -##ring -tc -##inate -depart -employed -compreh -poll -observations -completely -assign -aud -nh -endoscop -biomark -nursing -final -excess -oxidation -eyes -indicates -trauma -89 -agreement -##oration -##ucid -media -experienced -deep -veloc -clinic -excell -probably -costs -donor -construc -##rophy -inst -##phen -recommended -##bal -polic -make -pediatric -##clerosis -##let -divers -bu -##truct -##aptic -hippocamp -technology -neph -amb -recurrent -europe -pet -orient -spectrom -fibrob -##rs -bmi -occup -genomic -preoperative -estimate -compr -h2 -arch -assum -##azole -middle -morphology -targets -understand -polyp -protection -perception -##ologous -##erexp -##osynth -minutes -##ompan -##orter -fully -organization -div -##ialysis -apparent -find -operative -hg -metastasis -exce -susceptibility -##allel -what -##ulatory -stabil -accompan -tolerance -presentation -protective -onc -dogs -families -dental -endogenous -conver -noted -fusion -pan -##ylated -##thritis -analysed -np -towards -feas -location -university -earl -##ortun -adhesion -enti -##eds -carri -turn -overexp -coord -##tract -emotion -morphological -minor -motion -blot -pretre -contained -defic -excellent -##olds -##20 -providing -##teen -##ework -##eling -close -preparation -##way -##osomal -jap -##by -computer -issues -neutral -lapa -##osal -fa -rich -81 -distal -randomly -neoplas -##ervation -cytoplas -domains -##ester -volun -transf -afric -actin -defects -##ember -ischemia -bleeding -##tp -##erence -##group -reconstruction -##oked -framework -sectional -movement -##ercul -progress -ros -##iaz -independ -prolonged -csf -unl -##ged -american -##osterone -nons -##oline -stimuli -elucid -ovarian -lc -haz -##ives -microgram -extensive -thym -##cents -intact -benefits -##ancies -ben -none -caro -prem -ethanol -developmental -existing -intermediate -camp -##iform -fund -ather -##ict -milk -distance -##ocar -##ata -neck -##itively -theore -programm -delayed -deliver -satisf -necrosis -venous -##eli -##bl -##18 -aug -word -ischemic -oil -enhance -fif -##fa -whose -schiz -band -database -seems -az -sat -adolescents -feeding -##apse -emergency -##ician -##olysis -injuries -parallel -below -##ophag -typical -cruc -logistic -smaller -##asia -##ibly -choice -suitable -immunos -address -except -soluble -immuno -practic -##atment -##thers -synthetic -infarction -##ione -element -possibility -##aline -##imer -nam -mn -cancers -medication -##14 -transient -fracture -nervous -routine -bilateral -put -##estions -au -lang -accel -har -nanoparticles -##itoneal -showing -labeled -targeted -exchange -##itals -regen -correct -##ochemistry -curve -la -princip -##ream -service -reliable -##organ -##thal -hazard -maximal -##eg -serious -mill -hund -diffusion -clon -hundred -added -propose -growing -##esh -preven -ker -attach -##phosph -varied -demographic -observation -sequencing -3d -colum -##ococcus -##ophage -working -bladder -lv -##anth -gradi -fm -##cles -tun -egf -pv -##lycer -regional -cytokines -congen -cox -venti -gain -igf -perme -##gans -500 -mp -##opic -coupled -precurs -uns -lps -nurses -##ump -channels -vag -concluded -cyp -others -##ra -genotype -radio -comparing -##oglobin -primarily -hf -##igen -autom -cortic -break -oct -##iral -##oxyl -91 -pf -smo -dele -uter -ions -achieve -##ocation -markedly -dc -parents -skill -##taining -##thern -psychological -relax -##tenance -interv -fibrosis -ak -ip -run -fall -ens -trigg -zn -institution -therapies -maintenance -ring -antigens -thre -##anial -minimal -neutroph -tgf -phyl -amounts -conjug -additionally -##actic -nucleotide -##rophic -tubercul -epile -recognized -##idal -commerc -subject -native -blind -dors -cut -ranged -shift -197 -##aria -jan -tryp -##care -##rich -##osomes -progressive -diversity -##jection -metastatic -##leuk -##usc -proximal -##angl -classified -japan -industr -##inking -##stream -knee -bran -angle -infil -subgroup -injected -##ycin -retinal -##urg -tit -##ivalent -##15 -benign -periods -mh -##rocytes -thermal -port -maintained -bov -uncle -smooth -hospitals -t2 -antagonist -international -asc -electronic -seiz -3h -shape -antioxidant -solutions -eti -deficient -irradiation -musc -node -threshold -##genic -complication -##cal -subtyp -partially -invers -##tingu -communication -fiber -##itus -longitud -replacement -behaviors -contain -##urine -substantial -earlier -##orage -caps -failed -probe -never -##ament -occurring -monocl -algorithm -catheter -##erent -##ested -variations -unclear -kda -##ili -localized -spectra -##entical -##cher -reverse -##urys -scanning -describes -microg -vac -delta -implementation -perceived -tools -closely -exhibit -velocity -dex -subunit -##ophren -ru -nitrogen -tl -##atib -extended -sufficient -latter -subsequently -devices -cn -intervals -depth -shock -arm -##ias -hla -##ogens -united -enrich -distingu -metabolites -gc -neonatal -regular -enrolled -qualit -hydroly -interesting -schizophren -seem -influenced -prediction -hydroph -chinese -##letion -##apl -analyze -sensory -separation -tm -independently -##we -fab -##hythm -localization -hd -pathology -##mitted -adjust -ig -spin -identical -focused -estimates -beg -hemorrh -disturb -excre -##argeting -counter -##itiz -rise -opportun -healthcare -matter -amyl -surge -##17 -atrial -##ophageal -obtain -##tics -bul -represents -acquired -entire -positively -intensive -##avage -##aries -lam -vein -##uit -##ches -dominant -##rous -##ises -marg -##ontin -clearly -targeting -skeletal -emp -decreases -##try -p2 -incubation -bis -detailed -suppression -extracted -persons -height -invasion -derivatives -inactiv -comprehensive -cam -interleuk -##rium -thi -##yle -degen -largely -verte -##eck -hab -initiation -discharge -illustr -cytos -cover -perfusion -magnitude -sampling -extracts -encoding -contraction -##tious -fibers -direction -monoclonal -sten -##back -##ozyg -atom -pros -competi -enhancement -records -correlations -improving -january -##intestinal -derm -##elf -promote -seas -risks -psychiatric -fixed -presenting -childhood -crystall -aa -department -crystal -pneumonia -gastrointestinal -error -##19 -##irect -slightly -sinus -##ocrine -thirty -spectrometry -artif -moth -variants -##eptide -gab -thorac -challenges -pregnant -equal -measuring -emission -suppress -egg -beneficial -south -##east -hcv -fractures -done -allowed -interleukin -storage -cytokine -gland -circum -##anine -##ellar -300 -dependence -inclusion -tyros -ane -reason -characterization -producing -dispers -neurological -##aft -crucial -plan -##ohy -##ements -silic -face -refr -##ways -06 -##oside -##read -##opa -perc -numerous -goal -effectively -noise -surfaces -reh -##olec -longitudinal -expressing -adequate -workers -ethyl -opi -predictors -balance -healing -##factory -glutam -contrac -understood -explain -cluster -##oman -##ocarcin -vessels -nk -##elled -##atid -diverse -indirect -presents -250 -##queous -modification -muscles -limb -chick -tex -example -remaining -##athy -meta -parent -newly -possess -##beta -py -highlight -date -##oa -ldl -died -stimulus -allele -lob -##ike -lipoprotein -wors -##isa -ranging -exer -airway -vegf -substit -supported -theoretical -nitr -practices -aneurys -questions -aqueous -arth -tail -wr -##rotein -p53 -##acer -ast -neither -nmr -rhe -##rec -histopath -reper -dos -pel -strept -tuberculosis -kill -bc -##enchym -elic -summar -##otomy -metastases -hex -##itted -max -accompanied -coefficient -validated -aspir -##bc -obese -##ichia -albumin -networks -saline -decline -displayed -sustained -bovine -transduc -whom -##acin -hour -##icin -##isions -##ovirus -recommendations -kinetics -##rog -cd8 -suppressed -organisms -gold -##uting -leukemia -dy -corne -gm -##ensions -##alpha -admission -##a1 -transcriptional -ple -##oin -determining -granul -micrograms -##certain -polymorphism -##ades -immunore -ts -simultaneously -economic -##kin -amplitude -##ires -vector -##transfer -pulse -introduced -once -robust -persistent -##rief -##atase -residual -transcript -explored -dopamine -organs -##ologists -assigned -contains -##dehy -μm -aging -discussion -##part -##dm -particle -systolic -taking -spread -teach -##lim -##ners -mmp -##itory -controlling -consequences -mothers -##ifications -atheros -younger -arteries -##ipl -swit -##patient -considerable -##anding -conversion -coupling -##cribed -phenyl -consisted -frequencies -termin -lymphoma -acceler -mutants -mv -diast -engine -##ows -valve -##sin -##ycl -##itant -congenital -##ilateral -output -appearance -mis -thought -tes -##operatively -##okinetic -substrates -integrated -affecting -impl -investigations -directed -chest -120 -antimic -defect -salt -capable -green -gir -hm -##illin -vul -poorly -antimicrob -personal -fill -colorectal -ket -##rose -possibly -speed -##ulum -ni -langu -resources -##ina -##be -pool -delay -limitations -##ectious -##ensin -##fr -volunte -##ton -##enes -return -axis -facilitate -mammalian -replication -thin -ethn -functioning -mand -depending -reflect -favor -##oglyc -documented -##eless -##omycin -##udes -##otypic -attemp -##urally -count -clar -dehydro -kapp -pretreatment -reproductive -nad -emotional -125 -wt -##ush -called -##np -##mediately -##xt -2000 -##ocardi -volt -clearance -persp -demonstrates -pharmacological -antibiotics -elisa -black -##ban -##osens -##oe -microbial -##uvant -##ecal -lar -graph -rehabil -electrical -circulating -escher -##enia -immediately -mineral -dual -##uctive -autoimmun -fibr -t1 -bow -escherichia -bat -amp -separate -fresh -lh -ib -influenz -agonist -##wide -sections -move -##oplast -pack -recording -cod -07 -rod -##bor -##oved -cd3 -##bar -rein -radiotherapy -hair -150 -affects -explained -efforts -characterize -nasal -retention -##illus -##unding -##oic -rp -validity -reliability -original -gangl -substance -varying -##plasia -sarc -##iciently -hypoxia -expans -infectious -placed -transformation -ck -m2 -phospholip -inoc -fluorescent -dend -fractions -creatin -cul -lowest -satisfaction -##get -buff -hpv -initially -##otropic -adsorption -staff -settings -distr -conserved -modulation -implantation -carotid -methodology -##exp -brief -##oidal -charge -simulations -proph -##upp -insuff -##iveness -removed -fu -pairs -##ocomp -induces -attit -ground -adop -seed -react -##anged -plate -##izations -feature -##cephal -gradient -##avy -probability -simulation -reactivity -hcc -schizophrenia -scat -##oblast -glomer -concern -gon -##ads -availability -##gs -serve -08 -repeat -##avel -cytoch -mim -gi -qualitative -retro -##oresis -##treated -reached -identifying -ulcer -arthritis -estrogen -infr -peroxid -puls -burden -junction -upreg -cyst -##osity -##acet -hybridization -coc -tyrosine -subjected -##ophosph -##axis -##itin -murine -minimum -vaccination -occlusion -##ze -mature -knock -duct -disability -jun -dp -cyclic -##b1 -mixture -words -plastic -glycop -restricted -suic -yeast -receive -##uria -cdna -##electr -comput -coding -echocardi -centers -casp -concerning -fragments -##aphyl -adherence -##ocytic -equivalent -planning -deaths -language -decl -rank -##cap -##insic -unf -proved -##aper -spr -400 -lt -zinc -nearly -rehabilitation -eb -compart -lep -##ostasis -column -##hg -advances -changed -actions -temperatures -regulate -users -hearing -heavy -isolation -tax -barrier -##acl -##atically -advantages -similarly -sf -deletion -gap -biomarkers -attenuated -##acent -pathogen -includes -##entia -##omet -##sa -2010 -display -physician -policy -urban -woman -pelv -##opo -european -assessing -practical -steps -cytotoxic -concom -pure -embryos -admitted -catalytic -belong -fibrin -ce -summary -involves -tp -##pass -##ware -antimicrobial -skills -##usive -##usal -##accharide -parameter -biosynth -dip -rating -gaba -thereby -methylation -collection -##forms -ligands -walk -express -##25 -epithelium -stay -constructed -##dehyde -phenomenon -issue -cycles -##illance -stenosis -viability -tube -modeling -rar -manip -draw -bile -pathophys -composite -scientif -phases -scan -fibroblasts -cartil -##co -infant -concomitant -prevented -##err -##wh -##isp -prophyl -released -ages -leaf -##anded -sensor -fixation -medial -bias -##olin -treating -accoun -endoscopic -discre -nodes -##wise -##oscopy -trim -standardized -sera -variance -composed -##itol -monitored -austr -evoked -depressive -untreated -femoral -symptomatic -dementia -mb -shorter -absol -surveillance -##nt -bin -##time -pharmacokinetic -antis -ba -##ilities -implicated -##ohydr -surviv -p3 -transpor -##omers -##40 -sulfate -advantage -stere -##issions -dry -##onas -reporting -immediate -saliv -fields -##ash -supr -deposition -##aemia -consisting -segments -retrospectively -overexpression -sple -glycos -polymorphisms -variant -trunc -mi -distributed -forming -nico -si -melanoma -gli -##ache -analges -##path -##acial -##transferase -viruses -dissoci -attributed -bowel -instrument -db -blocked -rheum -##iologic -suspected -##inning -##idin -foot -inters -cg -iod -improvements -μg -give -glutath -remark -##iance -dorsal -china -utilization -generate -room -driven -north -spectral -##chron -deform -flav -differentiated -sti -metall -pitu -gover -##teine -comparative -sets -zone -excretion -experiences -sir -##oon -dimin -##ril -##abilities -lens -tasks -adjacent -expansion -assisted -aggregation -own -nitric -aid -assembly -depends -mell -volunteers -ing -nt -locus -rabbit -asymm -overl -applic -##ensus -pigs -stat -progen -interface -dram -multid -scientific -ld -relevance -##23 -##mentation -##pler -increasingly -sedim -##otr -dehydrogen -fol -negatively -libr -##ogram -undert -##pan -predominantly -influenza -##omical -##angi -rout -##geal -limits -rural -anesthesia -adenocarcin -09 -##ilib -restriction -separated -glutathione -apoptotic -referred -became -breath -socio -##term -pu -parad -operating -traits -glutamate -contamin -next -clusters -##ilibrium -hard -take -elevation -embryonic -##plc -##des -tg -nc -biof -susceptible -immob -sv -nuclei -extremely -donors -forty -drinking -##ury -tur -compet -detectable -##mp -candidate -commercial -pic -pituitary -##ulus -immunity -evaluating -##lets -synaptic -interpretation -insertion -laparoscopic -bor -mmol -aids -autoimmune -miss -check -errors -steroid -substitution -##ks -tumours -cx -##eting -2d -cum -cytotoxicity -prosth -extension -wa -quant -recruited -waste -electrophoresis -cataly -loading -potentials -facial -recipients -prelim -osc -curves -##ips -valuable -diastolic -acetate -bypass -##acting -nr -putative -##gar -relaxation -recovered -easily -cytochrome -##water -##57 -ub -sho -anomal -absolute -pil -episodes -expect -##aryngeal -multic -mucosa -analytical -bind -proton -bd -angiot -cry -professional -threat -tob -counts -obstruction -gram -african -decreasing -xen -pathogens -vessel -twice -glycoprotein -calculations -transgenic -abuse -subcutaneous -##agulation -lymphocyte -adrenal -house -veh -hdl -always -continued -##ging -feedback -##ocytosis -grown -##hs -articles -##ruption -##atif -causing -uncertain -maintain -mostly -##hydr -ect -encoun -##urance -mellitus -hence -##bre -##era -##oints -bac -regulating -synthase -biology -angiotensin -reasons -2009 -preparations -cavity -cartilage -spleen -##acco -december -chains -pron -emphas -acad -adaptation -define -film -##veolar -##yte -live -incorporation -##tor -##aken -integration -art -ju -interviews -co2 -##ensis -fing -mitochondria -##abs -typically -##ptomatic -deficits -nutrition -fewer -clinicians -distinguish -correspond -reduces -penetr -cla -estr -considering -heterogeneity -greatly -utility -regimen -##isition -##rup -solvent -track -phylogenetic -1000 -2012 -angiography -gs -layers -reperfusion -##onate -categories -emerging -##fp -##igible -preliminary -cond -ester -quin -existence -indices -##ayer -predictor -scal -aure -##opol -reviews -residents -##atig -fever -excit -##ifying -adjustment -smokers -hydr -tobacco -copper -hypothal -adjuvant -cold -acquisition -##60 -decomp -##ematic -correction -blue -##igm -viol -communities -sept -pert -##oxin -epileps -list -strom -retic -##1a -oscill -##mental -fatig -buil -veget -##gi -compens -utilized -quantum -supplementation -extra -precursor -bearing -injections -##etal -orb -fabric -interferon -##dp -##alities -adenosine -##phenyl -maturation -cytoplasmic -sup -definition -mer -immunosupp -align -caspase -cns -##gal -plasmid -displac -software -items -contents -identi -hypothesized -voltage -##assium -clim -unus -focal -##fe -2011 -pall -##draw -cleavage -nonc -inferior -rc -outs -##vascular -project -orientation -##amous -e2 -withdraw -dehydrogenase -##amental -##ley -serot -toxin -carbohydr -potassium -true -asymptomatic -classical -arom -mening -loci -intrinsic -motiv -beam -conduct -##ophen -aureus -nutritional -ventilation -insight -ace -##adiol -jud -eosin -immunohistochemistry -##omat -stratif -##yc -influences -park -distress -gall -tib -yielded -teeth -hsp -loop -##ready -dialysis -##mark -##atidyl -already -config -perf -##weight -chloride -##amination -##ears -aggressive -genotypes -mucosal -requirements -unch -requiring -interc -mach -##70 -##uments -##dna -yo -##chem -regeneration -pyl -cutaneous -auditory -mobility -array -##min -##idase -bcl -exogenous -##ister -pump -fc -scales -##ilization -ln -classes -##22 -##oden -adaptive -dihydro -aort -predicting -input -amyloid -motif -validation -coch -2008 -amph -2013 -mapping -##ux -##ylate -differed -##enchymal -##choline -transfusion -bey -ah -staphyl -options -epilepsy -estimation -##uch -enzymatic -##phrine -evident -beyond -precip -gave -participation -relapse -neurot -pent -##ipp -steady -educational -carcinomas -trends -mu -rabbits -sched -dh -brom -sul -##ette -##rovers -inner -hemorrhage -##oyl -herein -bip -insights -arab -phyt -##30 -protocols -##olecular -lipids -inhal -##ief -##ulative -##eses -examinations -reversed -##iness -##zyme -cirrh -barriers -substances -abstract -scaff -eg -##oplasty -movements -remission -undertaken -fundamental -hz -permeability -secret -interact -002 -##max -remod -corneal -careful -lamin -rig -##fluores -salmon -worldwide -formal -mmhg -society -hemoglobin -pn -argin -vide -##ospor -parts -discovery -intram -trop -immunoglob -shr -seek -controvers -immunohistochemical -thoracic -medications -##areness -superf -macrophage -stent -overc -fatigue -correlate -hemodynamic -phosphatase -ocular -embol -##opathic -precise -##azol -##iction -##urated -positions -##edic -##rounding -electrode -fals -2014 -lactate -regardless -2005 -uterine -vesicles -careg -cation -inducing -heterogeneous -acting -principal -author -starting -glass -anth -practition -determin -ammon -delivered -implants -enab -frontal -##adily -epidemiological -deb -heparin -member -uniform -aberr -indicators -dt -simulated -mor -epit -awareness -cham -thal -allergic -thrombosis -war -offers -films -##ritic -comparisons -##umab -consensus -##ought -##emat -micr -pathogenic -inhibits -##point -bodies -surrounding -nar -squamous -biopsies -##mod -##aints -empir -prep -subunits -truncated -schem -cultiv -##ensities -2007 -science -##arch -rational -lacking -hplc -##ime -homeostasis -concurr -peg -##ibration -p4 -parental -fetus -environments -##tead -osteop -hapl -feasibility -itself -pelvic -rupt -substantially -traumatic -gestation -residue -spine -professionals -mast -annual -labeling -similarity -creatinine -providers -2006 -technical -leaves -##nel -amplification -subjective -##avi -attack -cardiomy -hbv -etiology -##elihood -allowing -govern -testosterone -detecting -promoting -polys -##arrh -consistently -manifestations -##heimer -intrac -##tisol -combinations -abundance -alz -##aden -c3 -attr -sought -virt -opioid -sizes -mono -##ota -weighted -quad -retri -##aceu -ram -operated -capillary -implemented -disapp -malaria -lp -modifications -##adian -##azine -hippocampus -##anti -dimethyl -##ionic -fasting -clones -##inson -instead -sepsis -uncom -gestational -concerns -artificial -placement -##mc -esophageal -##ounced -o2 -##alc -##onuclear -initiated -team -fd -stom -resting -proven -supporting -##graph -photo -guided -##eded -hydroxyl -hypertensive -coverage -transcripts -version -alzheimer -005 -identity -ones -2015 -##epine -##oxic -gut -income -dimensions -occupational -##osome -##k1 -nanos -outer -prevalent -percutaneous -depletion -likelihood -translation -anaesth -##ectors -land -preventing -hydrolysis -interestingly -circulation -rb -born -guide -chond -promin -peritoneal -compression -compliance -##otides -diffuse -##apped -fifty -eeg -##lin -option -challenging -colle -sympath -##ulties -supports -symm -equilibrium -idi -biofil -adenocarcinoma -accom -namely -contributes -##ypt -##icients -seizures -ubiqu -ras -digital -attitudes -antagonists -abundant -coefficients -##down -cann -leak -respondents -##opausal -virul -microscopic -researchers -insp -##oming -consists -created -aneurysm -raised -om -ot -extens -##itect -pronounced -deviation -atyp -ho -##ietic -lost -evolutionary -bot -quantification -recruit -unusual -##uron -hippocampal -##bi -nonin -arrest -false -##uity -##essions -girls -##otoxin -structured -yields -pren -speech -beginning -##ophilic -leuc -ips -ent -databases -tt -##load -lumbar -anatomical -ile -limiting -bic -biosynthesis -##uous -absent -collab -inactivation -cig -atpase -##ocys -triglycer -isot -ablation -eps -indications -doppler -resolved -vaginal -##ued -instit -##entricular -difficulties -tooth -reading -abol -##eps -parasite -uses -whe -fungal -plaque -rheumat -##astom -##ry -exists -gd -##lip -##ameth -japanese -ball -##nia -standards -participated -##ba -auc -phenotypes -constitu -homogene -survivors -ald -exact -efflu -atherosclerosis -trip -°c -##mented -##orous -##ilic -##90 -subset -rv -##going -fore -osm -changing -##titis -complicated -malignancy -quantified -cortisol -wk -mess -##27 -##ibular -perspective -brid -##ana -postn -tlr -rarely -unchanged -##80 -screened -complexity -chi -gn -galact -unilateral -stimulating -leth -flux -##lastic -##assay -fist -wash -egfr -##pread -cumulative -coated -##ss -interference -bed -ath -##iasis -northern -li -alleles -grafts -##ozygous -wides -vehicle -potenti -##onography -selectivity -habit -widespread -ank -ways -monol -##uscular -##rocyte -vic -diarrh -refract -vertebr -start -intes -portion -german -absorb -dich -favorable -##ilized -situation -decisions -mother -hormones -representative -elimination -entry -ask -##omonas -##iae -hospitalization -attachment -##inea -visits -resource -finger -feasible -##anted -neigh -decades -2004 -optimized -carriers -kb -arginine -##acranial -synerg -actual -hydrophobic -enriched -##opoietic -dosage -inhibiting -perman -discontin -##ni -princi -conserv -##tures -conflic -academic -##ocortic -parkinson -sound -retriev -faster -psychos -##enter -fine -crp -experimentally -degeneration -sud -pylori -semi -technologies -ongoing -discrimination -questionnaires -mann -##urity -flap -construct -makes -##uctase -platelets -obst -narrow -probes -disruption -hors -protease -excision -represented -principles -maintaining -##ded -copd -##unt -ultim -spher -volumes -##uctu -involve -spermat -dn -diets -##ifer -alkaline -dro -tension -phosphatidyl -preterm -##ws -adolescent -fluctu -##ulose -infiltration -greatest -infrared -##eptive -excluded -forces -elicited -vital -##ocular -cystic -2016 -incubated -embryo -scans -onto -exception -##odil -cigar -see -teaching -##elin -##osyl -partners -trained -##ote -depos -prospectively -##ardi -edema -computational -##dep -timing -locations -consequently -##p1 -observational -##mt -##ascul -vulner -drain -phag -distributions -##esterone -emerged -##chemical -vaccines -eating -adipose -efficiently -plane -pts -histologic -accurately -cytometry -equation -sham -##fold -lith -solub -consequence -rotation -##oxygen -##opes -prescrip -##vents -progesterone -attempt -precision -diagnoses -closure -serotonin -##romy -optic -microscop -translocation -univers -akt -determinants -contamination -hor -oxidase -reversible -categor -##agul -phenotypic -##vas -rejection -atm -intraoperative -##holds -cyan -gonad -performing -responsive -preferred -indicator -cloned -larv -##28 -gy -visible -alveolar -elucidate -law -metabolite -subtypes -agonists -##atile -modern -generalized -carrier -warr -##ometer -microc -explan -linkage -##yster -splic -supply -downreg -dissection -ly -genus -rf -colony -assessments -obvious -diffr -anastom -library -synchron -##bb -combining -readily -text -closed -demonstrating -effort -postm -##eed -##ometrial -##oded -reductions -##zing -morphine -##rene -needle -dark -ionic -##itic -##amycin -##ders -bulk -##21 -superoxide -description -##26 -acceptable -regimens -confer -aver -##aster -outside -million -nin -stiff -mom -##tice -carboxyl -hol -sequential -reflex -tab -sclerosis -km -consideration -representing -mos -former -panel -##rotic -ans -accep -encour -hyd -##ah -##ani -##rd -antit -##itting -uk -just -selectively -studying -##enol -chromosomal -trach -architect -quantify -complementary -postnatal -inher -##iph -modes -occas -outpatient -vision -downstream -incorporated -chromosomes -##24 -##ocr -##ocellular -follows -atypical -aqu -estradiol -fibrill -mhc -##agland -##inf -##matic -deter -reveals -surgeons -##itization -##born -##avelength -dd -2003 -##quin -couns -braz -improves -ensure -##2a -filtration -expressions -##ograft -immunoglobulin -addressed -##ieved -disappear -ls -ze -square -##tes -perfor -s1 -corrected -diverg -##related -dox -race -dye -suffering -##day -neutrophils -##°c -prominent -largest -shared -encephal -boys -refractory -##erin -##lam -tag -immunological -conformation -epidermal -route -sb -ger -##mia -##ato -glycer -datas -stronger -##odef -scattering -suicide -sessions -stop -##ret -bonds -ke -##rown -intestine -reinfor -##infl -enable -sediment -priv -ige -sheep -powerful -subcl -examples -embed -##estive -premature -angiogenesis -recruitment -##carb -##atible -homologous -stained -nos -##ison -##iar -##theless -produces -nevertheless -wet -extend -negl -##ocon -withdrawal -respective -mycobacter -enhancing -correlates -bab -repeti -guinea -##tors -2002 -##break -drop -immunofluores -##osyst -rectal -pes -considerably -operations -somatic -microarr -carbohydrate -anemia -##enari -offsp -youth -proliferative -endocrine -immunodef -##au -modulate -antico -tolerated -resemb -003 -organism -intersti -passive -##cine -secreted -nurse -echocardiography -sou -##uly -##ophyl -##vel -object -intracranial -integrity -corticoster -##anned -prostagland -##omorph -harv -##29 -dendritic -portal -qt -histopathological -##amus -dic -proportional -presum -pharmaceu -uc -tor -dw -encoded -tn -gla -vertical -oa -deterior -obstructive -shaped -##electric -formulation -viable -screen -comorbid -##iary -um -differentially -##omotor -##ticular -##ou -diminished -motility -nps -tf -##aving -blast -wind -postoperatively -radiograph -consult -dyst -##oor -commit -easy -noc -preference -wavelength -offspring -##ectom -outbreak -kd -##opl -transduction -fatal -fertil -southern -##ophila -engineering -nmol -##romes -blockade -sixty -##ivariate -dissociation -kappa -##65 -incom -neutrophil -instability -##platin -enl -exhibits -bes -##nea -june -cov -bands -cirrhosis -remodeling -activator -##ided -##adic -chromatin -terti -2001 -situations -reag -##arp -##iplinary -##iol -lungs -##ada -##embr -electroc -accounted -infiltr -platform -rice -sympathetic -buffer -aorta -brazil -depr -##isph -responded -ham -##angement -pkc -worse -scenari -competitive -preventive -emphasis -meaning -pathologic -walking -hepatocytes -transformed -neuros -##otes -permanent -ars -tree -nutrient -cocaine -derivative -##ococcal -##encephal -transfected -##ephal -##ordance -##omeric -tertiary -lac -##orbent -promoted -osteopor -hypers -##ulates -radiological -inos -##xy -dissem -country -histor -inside -managed -centre -roots -##inflamm -protect -shell -cisplatin -width -overview -obl -comprom -doub -##avirus -deficit -mirnas -formula -hl -igm -psychosocial -spor -shear -personality -qol -surfact -##empor -keratin -monocytes -trp -destr -duoden -canal -reductase -##usively -incomplet -automated -participate -prenatal -comprised -elastic -anaer -modalities -apparently -coordination -online -nonline -programme -propag -##ora -gastr -sca -##mg -focusing -glands -acidic -##flow -implanted -tach -chor -overweight -malform -acts -##2o -ft -##pro -metals -season -##alling -##ograms -stimulate -##arged -##yd -excitation -##asone -##acerb -##atum -somat -contributed -harm -exacerb -##uits -subgroups -##times -##rane -atoms -border -##atibility -##off -glomerular -larvae -visit -web -leptin -wat -topical -aromatic -numerical -incident -sen -depressed -arrhythm -##phthal -discovered -##plicit -satisfactory -##ert -mutagen -normally -excessive -##actin -endometrial -fgf -blocking -redox -applying -ecological -dermat -genetically -intrav -cytoplasm -decade -##acycl -diffraction -td -merc -lr -##oxacin -immunization -mesenchymal -pressures -chamber -pathophysiology -nonlinear -minute -manu -abolished -pow -pilot -biliary -horiz -##osus -intraper -pregnancies -lobe -vacu -adapted -item -mobile -urea -yr -##athyroid -gsh -##clc -##ubation -retained -##eptides -replic -##ulmonary -empirical -nd -accepted -sea -atrophy -##inter -math -carrying -patch -quick -importantly -veter -industry -ventral -fruit -span -idiopathic -manufact -innov -##ancer -isolate -evid -rearr -180 -encountered -lifest -cysteine -pancreas -highlights -paired -vascul -tight -nicotine -trich -dyn -candidates -recombination -subtype -secretory -##anal -overcome -##ai -endomet -burn -##affected -templ -interstitial -##ica -practitioners -##phosphate -oocytes -atomic -capture -contributing -potency -assumption -##ortions -biomass -prolifer -critically -optimization -##ropl -endothelium -surveys -staphylococcus -ideal -industrial -##adequ -loaded -##orec -hos -##oustic -binds -lev -inadequ -##asic -apical -##agnetic -hams -started -1990 -##rod -##ardia -cultural -##akes -radiographic -110 -stromal -rheumatoid -##oli -##iration -drainage -iga -tended -alt -stret -stomach -elong -scar -activating -unaffected -mitral -##uncture -enables -##roz -responsiveness -##ero -nano -create -##odon -##cn -pep -serial -difficulty -histamine -microt -let -contral -psa -differentiate -hypertrophy -catech -##inating -manual -lute -category -##ians -adjusting -##osan -inducible -##osures -euk -sib -mood -##ools -glauc -##ocin -spp -perceptions -concurrent -nerves -salmonella -adduc -sugar -fear -trait -regulates -attempts -radial -statistics -alteration -hu -clarify -video -immunodeficiency -processed -snps -##vasive -retard -##oar -isoforms -microbi -modulated -protected -ethnic -eukary -##vis -student -conformational -predisp -coating -##rox -t3 -unexp -biomarker -ki -##rt -##nat -amel -pge -latency -demand -sds -utilizing -dimer -perm -##rile -tendon -traff -posts -continue -immunoreactivity -march -foods -lin -malignancies -##isciplinary -filter -mediate -substituted -icu -newborn -contralateral -predictions -##ometh -cattle -weekly -perturb -exon -##oderm -##opyr -algorithms -acuity -seizure -aerobic -sensing -hemisph -synergistic -hypot -##agic -##avail -rupture -adp -pin -##ci -rrna -cats -##plex -city -##cepts -stiffness -blocks -600 -axial -bipolar -intell -quar -opposite -##lings -histology -believed -##arcoma -microorgan -neuron -##fort -eligible -displacement -learn -asked -##entis -##iable -##ropathy -night -rhythm -antidep -##tructure -registered -digestion -scheme -turnover -unlike -beh -unrelated -ppm -prophylaxis -bf -mab -fourth -guidance -##cious -systematically -virulence -constants -##hd -disturbances -anthrop -glucocortic -caud -##gia -bmd -sun -tau -wheat -##49 -fs -exclusively -optimum -signalling -##orp -austral -neurom -tand -mononuclear -ging -ti -exposures -soy -##asty -medull -xyl -mum -casc -atmosph -coagulation -##lands -prescribed -facilities -tap -sap -compartment -chemistry -activate -1999 -cows -gradually -fasc -##43 -descriptive -jour -explo -tendency -immunofluorescence -flexible -esophag -peric -##elves -##nis -je -microgl -citr -adrenergic -mammary -hematopoietic -##elve -##ees -##ulder -rn -##opsis -twelve -reflected -fitness -androgen -transporter -##mitt -declined -static -##sych -kore -fig -regulator -goals -##economic -bay -peroxidase -climate -disrup -examining -salivary -ef -##umor -##pecific -##56 -tandem -antidepress -tetra -hepatocellular -##osen -uncommon -covered -neuropsych -opening -##eres -##lem -severely -oes -opportunities -expanded -bt -accelerated -impacts -sulph -porcine -africa -equally -antiv -mixtures -shoulder -embedded -answ -driving -fibrillation -##bil -headache -##iking -##pes -##iod -serine -conv -sudden -autophag -discusses -claim -helpful -distinc -autologous -ecg -irre -##odialysis -eosinophil -division -inverse -##apt -##enopausal -controversial -##ynaptic -marine -lifestyle -upd -returned -registration -##aments -##ptic -lineage -##inosa -sucrose -##chlor -##uation -##embrane -supplemented -##ception -##glyc -dros -electric -concepts -frame -saturation -##aint -syndromes -rated -dramatically -institute -ventricle -too -partly -chosen -superficial -##amethasone -genomes -adhd -july -##accharides -##ques -indication -##ariable -building -share -cvd -neurode -configuration -##ectable -circular -ophthal -##opolys -##imetry -##erge -neurodegen -gyn -vectors -birds -##idium -bif -ventil -financ -offered -bud -##gone -shifts -tac -property -restored -tip -c57 -conven -homology -usefulness -investigating -usual -##tidine -transferred -contributions -surpr -quite -196 -instruments -electrodes -chloro -##aste -lethal -modality -paradigm -epigen -aml -acr -suppressor -mov -alkal -construction -deoxy -plasticity -pit -swelling -attached -null -irradiated -tubular -chemok -undergone -distant -genotyp -##inflammatory -becomes -thems -pref -electrophys -box -##icking -themselves -familial -lifetime -##37 -opportunity -##vs -##ocor -##astatin -innate -polypeptide -enanti -mask -##aceae -histone -psori -conduction -subm -##estim -##ocated -weights -sometimes -perin -muscular -requirement -conservative -analogue -raw -fell -ide -leukocyte -##ala -1998 -mda -session -vin -004 -strand -besides -silica -crc -brach -zero -vo -##omatous -round -physiology -rhin -effector -##rim -placental -papill -visc -purification -##omb -strict -eggs -lm -##uate -phe -perioperative -hyperplasia -socioeconomic -college -epidemic -clamp -bol -pca -pooled -forward -##ping -linking -a1 -selen -recordings -sense -##rought -connected -extensively -amplified -drosophila -sick -injured -##aemic -hier -electrochemical -lowering -ia -urg -lup -stored -rub -registry -contaminated -##otoxicity -april -anatomy -homogeneous -hot -insufficient -##grade -##ploid -retina -competition -##ostatic -anion -##3a -##tives -neonates -mf -referral -orally -nodules -unlab -reporter -tens -iq -##viral -fibroblast -anaerobic -kidneys -eh -remarkable -ligament -biofilm -##idity -aspect -lag -##esium -preserved -a2 -deal -cure -hierarch -harb -breathing -hormonal -microtub -gives -son -margin -peaks -aerug -precursors -homolog -##oplasmic -2017 -alkyl -##gt -arteri -favour -##ocyan -microscope -farm -cysts -currents -placent -##ochrom -domes -catalyzed -traj -analogues -adapt -cess -##oradi -cef -##olone -dispar -polymers -##openia -##ologist -defense -encaps -aeruginosa -promotes -##activation -univariate -rect -vaccin -assumed -myeloid -neurotrans -mediators -english -hall -nacl -pore -silver -applicable -triggered -thrombin -schools -epidemiology -interp -naive -snp -luc -acyl -swe -##orecept -##ucted -kit -cement -waves -branch -functionally -architecture -naturally -##cp -expon -nond -cessation -##otal -surgeon -recall -anis -##antic -cues -gait -para -##cape -##ethylene -randomised -electroly -ckd -staging -osteoporosis -mirna -unexpected -sustain -pay -fos -##keletal -upregulated -##istar -append -ankle -abscess -purposes -##odal -bonding -microarray -examines -t4 -prostaglandin -fungi -chicken -tric -spontaneously -phosphorylated -took -throughput -periodontal -meal -circumfer -coval -served -κb -wistar -nitrate -##ipine -cin -##keys -hospitalized -exclusion -gross -comprising -follicular -140 -testicular -searched -##ylamide -crystals -insurance -gb -##hion -focuses -inadequate -domestic -equations -molar -graphene -glaucoma -##oprop -##eleton -illustrate -quantitatively -bs -kind -##io2 -##jun -scr -pharmaceutical -fox -##itized -##inase -dense -olfactory -##iciencies -##ulsion -clean -locally -mathematic -cro -indeed -emergence -mw -prescription -van -rough -verified -maps -tcr -intim -erythrocytes -ranges -inactive -caregivers -##aminergic -geometry -equip -chel -##elles -ala -nl -pseudomonas -specimen -cerebellar -elucidated -lymphoid -cmv -preservation -nmda -surprising -anomalies -##irc -intense -profound -##omics -entit -##ional -dexamethasone -##etized -acetylcholine -ultrasonography -##ague -##gu -1997 -incomplete -approved -##clerotic -##rozen -achieving -separately -analyzing -intermitt -fashion -kinases -resin -##enin -r2 -enhances -dosing -##acterial -##ino -##oprot -##oxygenase -seeking -##ober -mammals -##oblasts -anch -hearts -edge -acoustic -##35 -vasodil -aspart -aspirin -details -reflux -anc -histologically -mrnas -##ming -##ergies -##ighting -##esize -violence -dilution -epigenetic -ultimately -capability -meeting -juven -coinc -accumulated -ic50 -bund -ptsd -catechol -##cale -thresholds -fri -why -vp -gtp -p450 -neoplastic -##vant -expert -partner -ecosyst -##olarization -oligonucle -coping -tris -colonic -macro -capt -agric -communic -##otub -##ofib -parasites -##antib -front -balloon -antitumor -relaps -decay -p38 -invent -crude -##ellite -##opulmonary -jus -##idae -pancreatitis -frozen -clone -rnas -completion -sar -trace -neo -hind -agricult -pneumoniae -calculation -micror -sacr -##inergic -##aking -enough -enrichment -matching -##inescence -connectivity -vap -##anium -additive -##roplasty -fertility -policies -unst -evol -##±0 -crop -circadian -polarization -d1 -ureter -steroids -##oides -restoration -mscs -conscious -##lot -methanol -transfection -helix -##ivation -fsh -1995 -h2o2 -afford -hous -supernat -hyster -bioavail -##arum -conditioned -microorganisms -pollution -cohorts -nitro -statin -conjugated -labelled -##mal -blotting -##ger -vertebral -c57bl -upstream -mechanistic -130 -generating -progenitor -private -14c -##atonin -alignment -##oscopic -stratified -##atch -september -moderately -relief -##ographical -noninvasive -continuously -anticancer -clinics -threatening -formulations -##thromb -colitis -fistula -hydrog -verbal -alle -reticulum -tk -tone -translational -breeding -persistence -look -promotion -##oric -##f1 -##ionine -##iton -transmembrane -otherwise -principle -##ophore -##ped -glial -viscer -asian -che -bronchial -station -nsclc -##va -##tism -septic -scoring -october -conservation -crypt -##ubicin -scaven -c2 -employing -anatomic -sand -mapk -chemicals -adopted -##afts -neurologic -##3k -##utable -##rontal -ineff -d2 -mex -##key -herpes -opin -ont -seventy -mandibular -##orted -##adren -cigarette -behavioural -dent -ionization -minimally -calibration -copy -##axel -1996 -informed -##ulator -##t3 -medline -australia -palli -usage -interpreted -laborator -market -routin -##requ -cool -canine -##ulos -transmitted -renin -administr -pubmed -##onom -##rex -phenomena -lasting -l1 -ejection -aspiration -subp -resc -outl -endemic -popular -horizontal -catar -river -warm -charged -axons -stabilization -attractive -surfactant -herb -##otoxic -kappab -ecm -##olate -influencing -ganglion -discrep -ratings -alanine -cyclo -##obut -flat -df -##urgical -##olym -photon -simpl -tracking -perox -discrete -##amines -inpatient -trypt -cytosolic -##34 -arising -##mut -orbital -reaching -picture -##factor -antiviral -yl -neuropath -sig -recons -immature -entr -coa -rg -apply -##asm -melatonin -manipulation -sent -epiderm -tibial -mouth -antif -fp -asymmetric -sirna -tet -victim -neurob -dispersion -meet -antibacterial -insufficiency -enabled -penic -lupus -provision -moment -##ounding -##elect -notably -##pert -chd -1h -neuropathy -va -accompl -ole -reserv -##hl -devi -causal -inoculated -adsorb -meat -hypothalamic -##opos -cyclin -pip -##puts -observe -lipopolys -efflux -designated -##igation -##mm -105 -nb -ccr -##esare -suspension -laboratories -##graphy -multivariable -friend -lesser -amelior -pestic -##ffe -triple -ebv -gro -tsh -distric -episode -aph -objects -fn -raman -select -oriented -##ostatin -magnesium -rd -gyr -##osel -##ogenes -##tia -user -modest -definitive -evaluations -recipient -osteoar -normalized -##orative -erk -immunoreactive -##organic -##unk -rule -latent -##osarcoma -palliative -assayed -iu -cellulose -anticip -india -financial -imped -radicals -oxidized -sequenced -##letes -inversely -giving -cab -synov -scler -representation -revision -diarrhea -nal -hh -thyro -sg -photosynth -pleural -novo -pseudo -desc -##esarean -underl -##patients -incons -##arct -reproducibility -astrocytes -diaph -attributable -criterion -dysplasia -densities -guan -attenuation -forest -##aturated -granules -##asts -##tra -peroxidation -pari -aggregates -gst -adding -owing -##recip -##cy -##rag -nation -##met -hsv -directions -##ayers -erg -extreme -calculate -standing -lamb -covari -incision -##epith -haemat -grafting -uncertainty -##t1 -intermedi -micron -arise -##f2 -##yrin -cfu -releasing -gad -monkeys -##icul -##ecs -intermittent -recognize -validate -plasmin -fetuses -powder -##ito -compatible -destruction -establishment -migraine -##iatal -transplanted -leukocytes -neoplasms -becoming -disturbance -foreign -code -immobilized -deprivation -g1 -##static -avoidance -lumen -consistency -##ards -16s -grav -rac -cranial -##terase -nucleotides -hypoxic -hemodialysis -articular -dcs -##agement -sac -postpart -pigment -caffe -inserted -mathematical -striking -autonomic -incid -perinatal -conductance -converted -evolved -anesthetic -##tructural -contrary -unable -##enteric -fragmentation -cholec -kr -exert -postpartum -fecal -analgesia -##avir -arach -ambient -candida -##aly -##urable -##p3 -##thral -deterioration -vibr -##otherapeutic -kerat -condens -aneurysms -upregulation -abilities -transitions -discl -losses -massive -##oblastoma -suffered -prove -universal -ethical -remarkably -mas -160 -conjunction -amn -replaced -joints -bacillus -proce -##ogeneic -quantity -apprec -crani -shed -echo -##n1 -marginal -##ensitivity -conditioning -seasonal -switch -sampled -erythrocyte -dial -believe -athletes -c1 -abnormality -##uretic -##uck -constitute -##edical -alumin -cooper -##aryng -ome -##no -hosts -arc -parti -##g2 -##ova -##points -arabid -sharp -fdg -inver -##panic -penetration -alcoholic -multim -modify -trem -##67 -pharmacokinetics -##uric -csa -tbi -##but -inventory -din -radiographs -ligation -##enth -conceptual -correctly -lib -arabidopsis -cochle -drawn -lectin -spi -inorganic -##romycin -myocardium -##virus -historical -ded -microp -running -##oemb -dysp -lysine -thorough -##imb -cognition -describing -belonging -preclin -##usp -til -held -specialized -usa -striat -spectroscopic -##itors -knockdown -stra -flexion -masses -peer -autoantib -##olid -##omerase -filled -coil -expend -##zation -cannab -##anger -hif -immunob -mediating -polyc -proportions -wnt -ambulatory -catalyst -##uz -tyr -##bilical -##urnal -##floxacin -##plastic -avi -reward -equipment -##iliary -aspartate -developments -##antom -fibrinogen -##c1 -unstable -ingestion -tam -##essive -elective -##r1 -sy -gained -sarcoma -glu -##imus -glycine -preclinical -segreg -disulf -etc -mini -peroxide -splen -unp -##itated -##ares -envel -conversely -job -##luor -forced -facilitated -ovary -nephr -brown -phospholipid -eighty -##ifug -expendit -##gene -##qol -contractile -ih -giant -hbs -##pin -##ostomy -autophagy -cros -##olum -trigger -similarities -autops -swed -##p2 -vestib -interacting -##sk -lithium -##rospinal -1994 -resusc -neighb -scin -contacts -111 -##lipid -reproducible -takes -##ellum -eleven -heme -1a -fel -commercially -subsets -terminus -##entan -inoculation -vc -clustering -august -attempted -bott -contractions -trypsin -courses -slower -106 -fluids -name -visualization -essentially -sial -ai -mdr -colonization -visceral -soils -##enstr -##ozoa -aware -##after -800 -testis -aberrant -hypotheses -capsule -acth -##bumin -incontin -##itability -fibres -radiol -##ibilities -noradren -prophylactic -introduce -##iefs -cholinergic -immunosuppressive -balb -sole -batter -bioavailability -poorer -inconsist -##ectomized -phosphorus -##atation -125i -rings -unlabelled -bh -constructs -receiver -resol -slope -institutions -carry -knockout -overlap -fair -lipopolysaccharide -cerebrospinal -methodological -asd -postmenopausal -facult -fluctuations -gnrh -reflects -immunosorbent -glycogen -advanc -doctors -450 -##rupt -##imetric -##48 -##uding -##ecan -leakage -##aldehyde -chim -smoke -routinely -varies -pulses -institutional -autosomal -precipitation -##95 -revascular -transverse -1β -##ogenicity -##acing -subpop -##irable -##illi -jejun -h1 -irrevers -beliefs -chromatographic -moiety -##hn -explicit -anten -defective -##oxif -modulating -##ony -##oblastic -##pc -##odium -tachyc -thr -phantom -establishing -atherosclerotic -ethylene -cauc -liposomes -##epinephrine -photoc -vl -template -chit -limitation -summarize -##ilon -insect -oncology -distances -##38 -bw -agar -fermentation -exploration -##onduc -wastewater -canada -umbilical -encodes -kinds -##inance -droplet -cephal -homogen -gv -##odg -energies -##etamine -responding -norepinephrine -promise -dramatic -entity -spot -allevi -deplet -##bean -responders -##angements -eliminated -transforming -integrin -##benz -cosm -##hed -missing -daw -dut -genital -porous -disl -explanation -pad -slices -attending -arb -moving -intention -##nd -##regular -quanti -warranted -ccl -machine -nav -juvenile -pup -distinguished -gt -mention -los -ea -##eled -highlighted -deline -glycerol -surgically -parietal -phage -meg -##rolif -##lit -##psych -##75 -##utin -relatives -reproduction -##tified -caucas -intraperitoneal -##ronectin -##opia -corn -slowly -pth -irregular -wil -blockers -geographic -evidenced -##oprecip -fluoride -##omegal -##ura -seeds -2a -coex -caries -wounds -partition -dg -reflecting -resident -preferentially -determinant -interpret -yell -apart -averaged -ipsilateral -cascade -myosin -##ulsive -gfp -periodic -asper -west -electrocardi -biomedical -ammonium -monocyte -anaesthesia -##atosis -##idazole -allergy -regarded -attend -##ius -##allow -drink -apo -prone -##39 -##ygd -##entists -ses -##ocompatibility -caroten -ectopic -006 -cataract -##thood -##ble -prolactin -nig -maxillary -scaffolds -minimize -##osidase -##orubicin -analgesic -corp -pollut -dopaminergic -pci -plex -submitted -leu -adulthood -obtaining -endometri -##flur -isoform -##itig -##atr -##rowth -neuroprot -tick -aryl -##ilar -manage -psoriasis -scaffold -regulators -hydrocarb -##izumab -allogeneic -propr -wear -lymphatic -trna -##acil -amygd -##ylic -allo -cardiomyopathy -##oconstr -drive -##environ -poli -optimize -counseling -##ococci -apparatus -##vem -ibd -##ida -keep -##rof -affective -augmented -##ophan -##omyc -101 -mitogen -ganglia -schedule -titers -catenin -catalys -osteoc -prefrontal -toc -folding -extremity -comorbidities -codon -saliva -cot -mimic -tin -##eces -##45 -multidisciplinary -##amer -1980 -prescrib -proc -consumed -household -respiration -ulcers -poison -dsm -##oreg -mesh -cadh -plans -glycol -trap -cycling -iodine -estimating -##ele -sensors -termed -survived -mycobacterium -flexibility -##oir -named -effusion -accordingly -##epithelial -carcinogenesis -century -prompt -##inder -integral -finite -ascorb -starch -striatum -##ishman -##ensitive -preferences -silencing -##psychotic -##acrylamide -##onomy -##uled -parathyroid -participating -yellow -naf -##roid -endoscopy -leaders -eukaryotic -d3 -repetitive -piv -prosthesis -incontinence -transc -homozygous -cyp2 -hypothalamus -automatic -mrsa -osteoarthritis -volunt -mk -##cm -lid -##opress -polymerization -chiral -damaged -sensitization -triglyceride -##nk -irresp -##apine -veins -intraocular -weakness -vom -lowered -##pi -surprisingly -##rogens -heating -dismut -fabricated -rose -##ocept -virtual -coherence -104 -stranded -emissions -fus -solubility -neurodegenerative -confined -constraints -continues -##onyl -adjunc -connection -follicles -circumst -demands -cust -microenviron -played -government -primers -mosqu -##inates -gingival -trimester -cac -##uvate -##enyl -disad -##63 -probable -circuit -thereafter -facility -delt -streptococcus -hba1 -biologically -permit -glioma -##oplastic -denat -card -##uing -iop -physiologic -confocal -detach -##aver -vulnerable -perh -rationale -gol -##otrophic -intens -macular -lengths -##mium -1992 -##uli -##unctiv -fruct -1993 -disadvant -ammonia -##ectic -mism -desired -splicing -terr -copies -influx -##iffer -##opus -electrophysiological -##ubin -acceptance -arthroplasty -swim -microbiota -typh -##aid -angles -duodenal -cip -flavon -aller -##ola -infarct -nick -##yclic -##affin -##acyl -cardiopulmonary -bolus -mcf -##42 -ko -123 -switching -adrenocept -bacterium -corticosteroids -classic -##iosis -fev -aeros -flight -dioxide -##99 -stabilized -catalase -structurally -antipsychotic -flank -profiling -cyclase -108 -complaints -##ecyl -shunt -proinflammatory -sulfur -themes -eventually -allograft -##hep -ll -consent -nucleic -pod -##01 -trab -novem -wean -centres -immunotherapy -anastomosis -##ivity -paed -genetics -east -irrespective -uro -perhaps -chemot -winter -legal -rif -antigenic -##arboxyl -dod -america -presumably -##imp -##echanical -metric -pollen -##pyr -103 -fertilization -c4 -propagation -diaz -##psia -november -anthr -undet -seemed -horses -microvascular -cochrane -sprague -nac -assembled -dawley -reservoir -##mitter -##55 -percept -##brain -sized -tachycardia -fourier -gynec -mercury -window -mind -##aker -microl -filling -brady -##omial -engagement -boundary -epitopes -perfused -metalloprotein -leishman -##olet -today -get -helical -thrombocyt -bb -excitatory -stain -hcg -acup -plaques -hyperp -planned -detector -mosquito -##abine -bacteri -waters -trunk -##inting -slud -##arium -remote -##diagn -senes -placenta -helic -hydrochlor -reversal -hodg -##osites -102 -multil -incorporating -##uclear -ninety -##odegrad -accounting -apoe -ethnicity -##o4 -axonal -nephropathy -dismutase -papillary -phenolic -progressively -angina -anticoagul -colonies -persisted -cd34 -##asi -abc -ubiquitin -ppar -##a2 -hazards -roc -committe -sag -zones -##ettes -007 -considerations -kh -fulf -##ophyll -straight -modif -##dd -crystalline -triglycerides -organizations -internet -motivation -pretreated -germany -inherited -binary -nanom -##glycer -scored -cadherin -##icillin -m1 -epitope -productivity -febr -accessible -salv -tai -dest -autism -##hypert -##otropin -splenic -arrays -quadr -zebr -hodgkin -##ielding -dair -epidural -instruction -##ince -mitotic -spherical -aβ -judg -##yroidism -medias -##enzyme -inhalation -plates -gray -hba1c -##abolic -wm -adhesive -f1 -vomiting -##adj -vocal -##ozo -subd -symmetry -hi -psychiat -##ografts -biologic -microv -clinicopath -transferase -multicenter -blocker -spent -filament -views -##retro -fifteen -hygi -##amph -earth -##iac -tradition -chitosan -repeats -papers -fibronectin -prostatic -caffeine -##olding -##irubin -spir -immunized -microd -antiretro -render -february -compan -bull -perspectives -rhiz -admissions -limbs -lb -septal -rely -serological -dissolved -tryptophan -suture -##opter -##arial -hyal -prevents -abl -##roliferative -##monic -acs -organized -positivity -tro -paral -messeng -##itten -challenged -briefly -elongation -wood -fate -cdc -asperg -inapp -ka -ago -carefully -rhod -overlapping -engineered -##lyl -advance -menstr -intended -arachid -##lementing -##omod -idea -islet -malformations -nonspecific -prl -analogs -##itudes -walls -##mission -immunost -##borns -addressing -hyperglyc -pulp -##ropri -experts -eastern -tol -##ortive -pulsed -phon -1991 -isoth -prednis -nadph -008 -san -individually -coordinated -excited -mitig -turb -scenarios -tea -summer -dairy -cris -curative -viscosity -cough -omega -collectively -cadmium -arms -tong -tpa -titr -##flurane -##pe -big -##ochemically -hered -##tron -curric -retrieval -sludge -lineages -defining -differs -accid -twin -stream -##obs -circumference -personnel -thymus -motifs -remove -##x2 -split -ie -spind -vasopress -cerebellum -##icans -ff -##otroph -inappropri -conflict -##tir -endoplasmic -mesenteric -immunoblot -p1 -impairments -designs -##onazole -vt -##parts -analyse -extrap -intravenously -oz -pdgf -##ply -constituents -newborns -gem -reasonable -bioactive -##lements -resorption -depleted -emg -ophthalm -fh -exhibiting -programmes -thromboemb -dermal -##inge -##rogenic -aor -cyclospor -cholecyst -methionine -hierarchical -##entary -lands -autopsy -haplotype -##icidal -indian -links -myeloma -faculty -##amil -g2 -intermediates -naph -expectations -##eria -##66 -gels -ice -australian -vig -dynam -##ardial -fused -##onym -mpa -categorized -trees -nitros -accounts -rigid -##arse -glucagon -ultravi -nutrients -ultraviolet -resected -mentioned -mang -pals -kept -conductivity -##bow -matric -##ucl -ment -##exin -dust -explor -gaps -resuscitation -familiar -bases -doxorubicin -##fluor -gf -spite -antifung -##fish -##obarb -ribosomal -spring -calif -##edema -revascularization -heterozygous -dead -cerevis -semic -killed -quantities -arsenic -chr -disappeared -inherent -manifestation -counterparts -polyacrylamide -synovial -relating -spermatozoa -immunocyt -notch -cea -hypoglyc -compartments -nemat -lives -monolayer -handling -interviewed -##thy -pec -cationic -##b2 -lactam -vast -trabec -xanth -immunoprecip -bmp -descending -##hal -cerevisiae -intubation -chemically -stresses -infertility -falls -##obacteria -antiretroviral -pmol -isotope -connective -anesthetized -##amel -strongest -sacch -fractional -contract -hypersensitivity -foci -##glutin -occasion -stationary -meaningful -cit -lucifer -lean -##osides -electrostatic -publications -suggestive -icd -killing -predicts -publ -theories -##iers -lign -solar -##obac -laryngeal -##epam -empt -endotoxin -##aved -thermodynamic -cub -pge2 -built -##idic -131 -fmri -mixing -##mv -preserv -##erated -differing -competence -collaboration -discharged -concentrated -pah -dv -planar -grain -voluntary -##hepatic -vestibular -tot -##eption -plasminogen -circumstances -##amb -stone -branches -titanium -promoters -copolym -oxal -rain -mating -kary -summarizes -glucocorticoid -##eries -leucine -saturated -disulfide -island -inputs -pw -enabling -began -synaps -chlam -##aching -##odontic -##enal -converg -tracheal -feel -##stem -107 -shortening -irreversible -nause -hydroxylase -##cein -reacted -mot -##fil -##88 -ultrastructural -decomposition -##47 -hemisphere -kl -##59 -coast -eros -##ographically -symb -junctions -##ife -plasmids -myelin -build -##ocycl -tof -##ophosphate -causative -##ented -##amino -nanotub -gyrus -centered -bdnf -lambda -pacing -shifted -demonstration -illustrated -lf -seal -##tracted -sexes -sexually -province -nausea -aeti -employment -ae -th1 -americans -##ift -polymorphic -annot -hypotherm -provider -##iding -nom -computerized -passage -rhyth -soc -sah -eigh -macrom -imper -californ -##ously -teleph -cyl -##itance -centrifug -##onit -larval -facilitates -scinti -##izer -cm2 -datasets -##oate -##onv -innovative -##agglutin -cations -##endocrine -dermatitis -biotin -##feeding -foss -##tium -##x1 -##acycline -neuropsychological -silicon -##ifferenti -109 -page -thymidine -perceptual -##olateral -applicability -##obenz -bath -managing -paediatric -endovascular -##orh -gelatin -stimulates -projection -colour -##flav -transporters -exten -retardation -retrograde -##amily -embase -dihydroxy -actively -suffer -dib -hispanic -antidepressant -connections -modelling -arrhythmias -##ocking -abortion -clonal -##32 -mapped -dissolution -sufficiently -##azone -butyl -gradients -proteolytic -##opr -microsph -orders -albicans -volatile -retinopathy -chip -mediator -executive -nic -1beta -crosso -precon -rbc -envelope -geometric -converting -##ience -pi3k -vo2 -reinforc -endpoint -batch -written -ww -minority -ri -aldosterone -fibre -prp -allergen -deletions -elast -bup -diaphrag -oscillations -calcification -##phasic -hernia -preoperatively -##68 -mucin -agricultural -##dh -##tigm -##wan -##asal -relate -maln -phil -reciproc -carp -efficac -vill -surveyed -scc -moun -bsa -##romagnetic -supportive -##olipid -calor -topic -radius -##gic -hereditary -characterised -percentages -##sc -habits -subcellular -cytology -bones -meningitis -behind -##o3 -dataset -amygdala -burst -hypotension -osmotic -atl -esters -##unting -##econd -serves -termination -##ritin -oocyte -consuming -lic -rho -diagnose -virtually -comorbidity -acl -antihypert -haemorrh -solvents -asymmetry -##oler -firing -cytogenetic -lived -unlik -##oac -cutoff -ami -participant -grass -##ido -methoxy -##entric -##idone -guideline -attacks -porph -antifungal -##44 -##mcs -quickly -unlikely -sports -continuing -restr -##traction -flag -choline -hemorrhagic -##fc -matrices -players -electrophore -propensity -als -degenerative -indicative -fifth -##ipr -imbal -##amidal -rules -consultation -neuromuscular -##erate -ber -faecal -epileptic -##ineal -##onectin -brains -glucuron -seconds -underestim -enamel -##rug -mcp -ordered -fv -##oxicity -extin -lattice -phospholipids -micromol -soybean -clinician -extrac -lew -axon -renew -sacrific -retr -##utrition -accomplished -intellig -cloning -boy -adenoma -aquatic -##rotomy -ldh -modifying -cerebro -autoantibodies -trypan -pock -tropical -fibrous -aux -t2dm -##ontrol -confirming -cleft -b1 -##afish -##berg -puber -luminal -##inical -french -beta1 -citrate -microw -##odic -zebrafish -##ocaine -##version -microenvironment -sedation -dysreg -biodegrad -th2 -ngf -endocard -adequately -contempor -indometh -rip -exceeded -hela -##arynx -cone -trif -##33 -dominated -bridge -epsilon -##fo -follicle -fulfill -##h2 -tongue -radioimmun -µm -publication -##while -kcal -##eties -reconstructed -verify -sharing -conjugate -bax -skull -asthmatic -demographics -indomethacin -##opre -telephone -phospholipase -pell -district -##ondyl -apt -displays -semen -vesicle -tio2 -clock -succin -oligos -revised -reagents -crf -dichlor -##cd -fire -enlarg -crossover -sporadic -adenovirus -glutamine -spik -thous -deposits -priming -intensities -##asome -screw -impedance -##tification -opinion -##ersion -115 -office -grades -match -dilatation -intron -curv -osa -infused -##tructures -her2 -ew -##obulin -mumol -embolization -atmospheric -##opyran -##alin -nicotin -behaviours -creating -adenomas -hydrophilic -##atinum -bayes -sput -perp -##omyel -exha -investigators -apolip -##plantation -##oms -falc -thiol -anova -electrolyte -segmental -##tituted -retrieved -absorbed -activates -calves -nodal -amine -##ovir -expand -##ois -committee -fourteen -vaccinated -troph -preceding -##inform -isomer -deox -scope -confound -scaling -##xa -immigr -multiplex -inappropriate -##onn -sigma -foundation -##ohex -imag -vasoconstr -covering -inactivated -stepwise -apnea -mediates -ends -apolipoprotein -exploratory -augmentation -definite -crohn -pyruvate -departments -posit -##romyces -##rost -superv -weakly -bv -messenger -filaments -primer -morning -ineffective -sequel -debate -ecl -palsy -neuroendocrine -painful -divergence -palm -1988 -perforation -##ophosphamide -cck -aq -p21 -oxygenation -faces -accommod -cooling -monthly -##oglycan -curc -geographical -islets -golgi -##atics -1989 -ling -corpus -amput -luciferase -polycl -##m1 -##abd -inhaled -neighbor -constitutive -ord -tumorigen -oxyt -multif -interfere -graded -downregulation -whil -recessive -temporary -burs -asia -confirms -actually -##oal -epine -folate -##role -alp -singlet -concerned -traffic -epic -arrangement -whilst -hands -locomotor -##anolol -mobilization -##urgery -hamster -livers -tors -compact -away -gather -blinded -paclit -prosthetic -adsorbed -antihypertensive -exerted -breastfeeding -stimulatory -degraded -n2 -##inted -##istrib -paclitaxel -##ulas -pathophysiological -habitat -possibilities -acceptor -discriminate -lipase -schw -ctl -quench -transcribed -terminals -eliminate -selenium -##omed -vanc -expenditure -urgent -nanoparticle -cochlear -bn -surrog -monotherapy -reflection -derive -adiponectin -cpg -angioplasty -##etrical -fum -mun -##uge -bench -##d1 -tubes -substitu -salts -cast -##uloskeletal -immunocomp -typing -sphing -rum -##otom -lysis -extending -radioactivity -genotyping -regurg -heavi -137 -##plicate -segmentation -deformation -penicillin -learned -duplex -sug -maize -relig -##afil -bilirubin -sbp -initiate -mutagenesis -polyethylene -lymphomas -atopic -##aromyces -projections -reagent -somewh -embolism -##vation -physic -brainstem -natri -confounding -##yel -##osper -elicit -cyclophosphamide -microsat -expiratory -entered -nonp -nif -percenti -accordance -reliably -strengths -bus -abd -##asp -dan -cesarean -journal -ether -gent -##ificant -musculoskeletal -acin -##orbol -diameters -##uterine -chemokine -##itating -taiwan -afferent -conjunctiv -ferritin -h3 -emphasize -waist -king -mec -capabilities -mds -unex -chance -##ofol -##orac -claims -##ompass -sided -epidemiologic -transpl -helper -##esophageal -priority -##inos -vasopressin -regime -differently -##oreceptor -deviations -allocated -128 -nail -encompass -bim -noct -mutated -##incter -vulnerability -mtt -immobilization -##anin -sixteen -##mann -c5 -synapses -##ka -ubiquit -striatal -interfering -lavage -rodent -intramuscular -stents -##otopic -lactic -##ading -##k2 -spike -sensitized -pivotal -##oprotective -##alanine -breakdown -loads -monomer -##ilis -##ona -##36 -platinum -##ethanol -propranolol -deposited -inconsistent -##ignificant -nitrite -##titude -009 -adolescence -##aplan -129 -balanced -battery -immunosuppression -erythemat -seropos -noradrenaline -##ulators -##aterials -inositol -taxa -##ete -catheters -uterus -##ussian -hmg -soon -esophagus -ultrasonic -240 -##odend -negligible -outbreaks -clot -proteinuria -##ipped -tracer -rm -##aminase -thrombus -##otechn -##77 -unres -sut -pathologies -thrombocytopenia -originally -##agr -cytosol -morbid -fluoro -kaplan -latin -µg -agreed -confirmation -summarized -granule -microglia -##osum -tear -varic -forear -mtdna -sector -##odi -electrophoretic -plot -dendr -polyclonal -iran -exponential -##artan -icp -california -##ushing -##ney -stratification -exec -somewhat -experiencing -successive -antip -totally -benzene -##traum -dehydr -taste -dried -grouped -##aced -medicare -operational -h2o -##coplas -vertebrate -warf -veterans -dissemination -duc -occip -doped -2b -##ophysical -rcts -chart -##gluc -positron -plexus -##etine -apc -argue -thirteen -##urium -diver -aerosol -dpp -extinc -##lutin -##opropyl -disabilities -antisense -##astric -avian -biosens -purs -##nes -immunomod -avoided -births -humoral -##lipidemia -##zymes -##000 -propofol -femur -heterozyg -##illa -menstrual -urolog -##ods -specialist -syr -particulate -unfav -spindle -diagnosing -downregulated -multidrug -##oney -##46 -safely -hop -ee -hrqol -116 -canadian -antioxidants -endometriosis -prescribing -##urons -fabrication -##hex -traditionally -118 -chronically -mg2 -##ersonal -vitre -##ocyst -refractive -vw -assistance -detrim -scd -nir -##keleton -##place -stack -vapor -harvested -##ittal -schist -dft -positioning -nct -fitting -register -gradual -peroxis -facilitating -##oti -acupuncture -##emporal -sacc -conducting -wider -phospho -lysosomal -medicinal -cerebrovascular -jnk -indirectly -differentiating -##enem -immunoassay -gps -invari -chlamyd -cosmetic -agglutin -ok -rodents -##estr -gallbl -c6 -aggression -hold -altering -##31 -periodon -hyperglycemia -spatially -dysph -immunostaining -##urin -epinephrine -raise -##electro -cow -custom -contemporary -italy -attributes -chap -##opathology -hope -caudal -##idol -viewed -inequ -interventional -zeal -##onium -##timal -attitude -come -board -killer -nocic -sor -travel -##ozyme -discontinuation -fructose -135 -##oxidase -carbox -hole -crt -##anic -borne -multis -table -keratinocytes -pma -sheet -phi -synthet -##glycerol -preec -lactation -beads -conjugates -bring -union -physi -qp -meier -##utely -##nary -interd -intrap -notion -clients -korean -##bone -genera -restore -##odiaz -paf -biod -deformity -fungus -pyrid -##imaging -##ofacial -##amma -disrupted -##thrombin -routes -##acaine -##yx -somatostatin -zealand -##vastatin -medicines -##orated -chickens -mirr -covalent -normot -racial -sediments -acceleration -wrist -falcip -neutrop -hap -##bred -mtor -1b -moist -deterg -fb -hygiene -hypog -deploy -##oea -126 -neuroprotective -dq -spots -originating -bromide -glycosylation -depolarization -semantic -unn -unexpl -pollutants -county -##xr -ingr -lymphaden -stones -intercellular -pmn -voc -fen -edta -##ita -e1 -chemotherapeutic -##formin -2018 -senescence -##odem -amongst -granular -fitted -entrop -neutralizing -##ecific -exerts -##urea -cuff -##onitrile -angiogenic -implementing -b2 -margins -##rexate -##rio -morphologic -dentin -codes -piper -proteases -cgmp -##aran -train -accident -neoplasia -interacts -sphincter -brought -inversion -endpoints -timely -##ylase -1987 -##fall -##oxane -scenario -military -suicidal -##olecules -##ustion -desirable -floor -iqr -##atectomy -##atite -addiction -digestive -worm -rearrangement -##erum -designing -polysaccharide -aminotransferase -ultra -stretch -falciparum -dextr -##inesterase -creatine -convenient -incremental -hn -hcl -overload -trache -##osamine -intrauterine -revealing -##pation -erad -112 -compromised -##adm -##98 -diffus -##rophot -solely -prepare -simply -couples -poisoning -functionalized -presentations -gallbladder -calf -asa -src -hydrogel -ran -yielding -assumptions -flaps -psychosis -mag -extinction -##ishing -aberrations -acetic -ik -linearly -ingred -neuroblast -##onuclease -##arinic -##bir -postin -kil -visualized -functionality -proof -##ropo -##urations -hes -desorption -pbs -kv -##growth -oce -drivers -pharmacy -fractionation -endoc -nickel -judged -sheath -hepatocyte -intracere -repress -patency -harmful -lock -rotational -##opher -efficacious -##lv -##mo -grading -bank -transcriptase -carbonyl -curriculum -arachidonic -proline -neoplasm -biomechanical -compensation -dot -catheterization -cxcr -sav -caucasian -corticosteroid -disseminated -##ortical -magnet -contribut -origins -appl -toxins -recurrences -initiating -france -tubules -##otrexate -mtx -forearm -##omethyl -polymeric -##hyd -tailored -unid -hydrochloride -nn -entirely -monte -washing -1α -##ationally -plots -bioinform -sequelae -β1 -##ecess -overexpressed -echocardiographic -trajector -benzodiaz -metformin -iliac -diagnostics -organizational -haplotypes -##umb -angular -trajectories -incomp -transpar -physicochemical -methan -modeled -tat -stock -polypeptides -york -dens -##grav -tubulin -comfort -1985 -##opharyngeal -##gd -creation -anter -enhancer -nich -rescue -histopathology -pfs -subf -oligonucleotide -possesses -methotrexate -vancomycin -##elia -##iate -bayesian -insensitive -bilayer -serotype -manifested -titer -##atric -dodecyl -spanish -covariates -deleter -##iab -##isely -intrath -smad -geometr -lake -diphenyl -histopathologic -##agia -plur -##wards -170 -proximity -##iently -beds -postsynaptic -lactobac -cue -paraffin -deleterious -145 -radiolab -##ipping -##ldl -microsomal -generic -amide -insects -ceramic -die -distinctive -specified -identifies -coexist -##orin -ics -icam -parenteral -funding -radioimmuno -freezing -##iesis -ascending -airways -tx -saccharomyces -spreading -heterologous -##otent -selectin -electros -kinematic -encode -ivf -aga -##amen -fruits -counting -warfarin -##2b -meanwhile -hbsag -bundle -exons -bios -gains -spectrophot -enlargement -pacem -circuits -disordered -aki -excluding -rays -postulated -curcumin -##aterally -eradication -m3 -avoiding -endothelin -detrimental -supplements -hct -ascites -##yrene -prev -broader -plateau -cobal -##je -deng -buc -##amed -##ican -discom -amplitudes -forens -definitions -weigh -pars -expensive -instances -microwave -dilated -imply -##meth -collaborative -cytoskeleton -pdt -benzyl -microspheres -permits -homes -pesticides -elab -salvage -##istribution -##ipt -##otropy -schwann -paid -voice -adducts -regurgitation -delays -##kinin -tons -consolid -felt -phenol -##oracic -realistic -contexts -tablets -##ibrill -intakes -discipl -occasionally -##oreal -supernatant -misc -erythropo -hypertherm -##58 -##flex -catalysts -implies -stroma -abdomen -documentation -disparities -advers -plasmodium -lipoproteins -insectic -trade -carboxy -ischaemic -laparotomy -##brids -regularly -recycl -equival -microsomes -pas -venom -cbf -alternatives -advice -carbam -radioactive -excised -114 -develops -bcr -photosynthetic -era -corresponded -robotic -oval -dengue -euth -gag -pgf -interactive -gfr -##vac -fuc -##atalytic -aur -##rep -vip -happ -girl -concordance -isomers -zno -energe -interm -pai -works -helps -assembl -nude -sciences -##ya -adoption -odor -tv -##oneph -1986 -borderline -carlo -microtubule -accumulate -lamina -axillary -metallic -##inin -##yrate -adipocytes -subtil -freed -##ocysteine -##infected -epidermis -ecosystem -topics -accessory -inability -##ofen -##umental -sna -quer -fev1 -arous -117 -##osensory -allerg -ductal -corticosterone -##chol -glycoproteins -intrad -##head -remed -selecting -backbone -##rolimus -antiserum -posttrans -dha -allocation -rock -##n2 -natriuretic -##agg -captured -pka -cadaver -##itabine -aspergillus -discomfort -sagittal -mets -http -failures -infusions -cholangi -crossl -synthesize -style -nap -interpl -##odyn -rej -constric -swine -probiotic -vm -pcs -stoch -##li -ances -projects -##oxifen -##pd -infrequ -msc -opt -##inyl -illnesses -2h -mca -normalization -glycemic -ascertain -trafficking -residence -amylase -entities -depended -len -sputum -decompression -##oking -therapeutics -doctor -bid -constitutes -##ivacaine -##imes -subtle -##tiles -##rist -chemo -gw -##nam -ix -subclinical -format -unnecess -myocytes -metaph -expanding -deduced -suspic -amorph -anticoagulation -ageing -brca -malnutrition -aggregate -postural -representations -micelles -resultant -cleaved -chimeric -ja -relies -eighteen -elbow -pharmacologic -cooperative -phagocytosis -##78 -a5 -##omyelitis -spain -##ua -vibration -precisely -orthop -subtilis -exploring -fluorescein -bottom -forensic -mabs -sociodem -psychotic -regenerative -security -choices -physically -willing -##ichi -outflow -124 -filters -calmod -cup -phorbol -ash -350 -##ximab -bill -synthetase -##aind -##eptidase -##atization -reserve -docking -##ativity -simulate -programmed -oxytocin -jaw -attended -##cell -paradox -##respons -mci -utilize -chondrocytes -allelic -detachment -rcc -periodontitis -##olol -polarity -parenchym -encapsulated -##adh -##ocysts -sedentary -histidine -teams -existed -##iest -parasitic -hydration -neuroimaging -urethral -analytes -progenitors -uncover -module -gated -iter -##inery -stool -polarized -210 -anions -necessarily -removing -##quine -quasi -##anz -semiconduc -ign -properly -pcos -patches -calmodulin -##ky -bcg -hyperch -tomato -sociodemographic -escape -hyperactivity -##acrylate -competing -bruc -substitutions -##ullary -##ulsions -bms -##edge -thickening -##thetic -sides -rl -polyps -oss -lymphocytic -##eremia -icc -hysterectomy -1alpha -underlie -trib -incorporate -afl -##eptic -monkey -crow -monolayers -1st -hv -nox -mosa -dextran -##oni -consumers -ulcerative -lidocaine -13c -geriatric -tocopher -##ulae -suspicion -oligodend -##abular -granulocyte -tumorigenesis -surviving -##eted -macroscopic -velocities -chf -solitary -title -isopro -190 -##noid -nocturnal -##eced -trail -##ietin -intran -##ordant -angiographic -bk -##itonin -rising -employees -igfbp -##rofloxacin -hepg2 -narr -##69 -##omere -boundaries -anticoagulant -ovari -bub -conditional -normotensive -##41 -##iated -disk -observer -malt -##vive -gg -duplication -cured -##trin -proliferating -encephalopathy -##inement -##elle -necessity -alive -pde -dependently -polyphen -italian -##graduate -scintigraphy -spray -dilation -hypothermia -biphasic -electromy -##iles -##onical -##cetin -pocket -adenine -preferential -microsatellite -tunnel -pon -##aa -terat -bact -cultivation -spondyl -transferrin -micrornas -adrenoceptor -##andial -stochastic -composites -emphasized -hrs -unfortun -qpcr -pyramidal -diethyl -modulates -vibrational -##irection -hybrids -outpatients -hat -##ball -##ophilus -anthropometric -stressed -imbalance -disappearance -intravascular -overt -victims -explores -muscarinic -contraceptive -##tise -theoretically -amniotic -attain -shapes -columns -tightly -##yz -cabg -##uan -performances -##mus -promp -satisfied -configurations -##anese -##hr -eosinophils -sto -cha -##lyca -detox -analogous -specialists -que -##ussis -swallow -interplay -ache -##akers -ethics -caution -##acrine -engaged -breaks -cass -##oactive -surrogate -tlr4 -rsv -##borne -##iii -##ievement -##urt -maze -f2 -stake -##amicin -teachers -instrumental -tuning -##asting -necrotic -labelling -##aign -enlarged -vasculature -##c2 -##62 -##treatment -occipital -babies -immunoprecipitation -113 -hpa -deficiencies -700 -whereby -integrating -##iguous -spaces -korea -neuroblastoma -##inositol -119 -ut -presumed -122 -rms -lenses -survive -##iders -illustrates -agency -thio -modulus -reactor -morphologically -tolerability -road -##ifier -scann -polyn -ease -##glut -conflicting -##lampsia -cooperation -##73 -microbiological -freedom -##onine -epo -##du -feb -dispersed -subpopulations -cryop -erk1 -##enoid -hypoglycemia -dislocation -palmit -vib -##kary -stat3 -##oke -observers -##alent -generates -hematoma -preeclampsia -aspar -achievement -nonh -nafld -campaign -drought -npy -compensatory -##arter -frail -alph -neurotransmitter -interpersonal -electrons -tamoxifen -##achlor -##osex -uric -synapt -gvhd -##omyces -erythematosus -pater -percentile -capill -intellect -weaning -vertebrates -glp -integrate -potentiation -##idym -procedural -cardiomyocytes -##umination -parenchyma -segregation -appra -chaper -nanow -ich -intoler -freeze -unnecessary -##jug -##quis -coordinate -amorphous -loops -siblings -rearrangements -corresponds -evolving -##itate -emt -ppi -adherent -intersp -disab -generations -rigorous -##ulins -##enoic -##ena -bright -mdd -transplants -atax -wire -##opathies -##eliac -##ourse -sma -##o1 -sox -##izable -ozone -##yric -##ece -##insulin -pyrim -##ocortical -reass -lncr -##osom -##band -##ml -twins -sulphate -surround -##arbonate -neovascular -##ectory -##obacterium -obstac -septum -mich -##list -attentional -methane -casein -pertussis -dyes -cxcl -indexes -##orax -##4a -##oidosis -landscape -mad -simplified -025 -bag -herbal -trajectory -##atology -scalp -scavenging -endurance -##ophilia -biosynthetic -radioimmunoassay -reciprocal -archae -nalox -hypertrophic -preceded -##imide -basement -smear -cw -perfect -oxidant -##icip -brachial -fly -formalin -qualitatively -favourable -uve -chapter -vegetables -approximation -rpe -technological -menisc -dimers -worsening -isop -midw -medullary -bending -##h1 -sj -scheduled -quantitation -acidosis -##igenous -pahs -psychometric -bonded -epididym -investigates -oxy -fuel -##ulent -sigm -sparing -intramolecular -extremities -galactose -##obi -quenching -##79 -operator -##ritis -incis -comprises -germination -##ystic -cyclooxygenase -##obarbital -talk -##itter -circulatory -jak -collateral -grew -exceeding -aluminum -fingerpr -libraries -stressors -hsp70 -statins -##ortem -triggers -##test -mosaic -introducing -##igenic -weaker -replace -haemoglobin -##ji -3p -juice -##d2 -##worm -suppressing -gonadotropin -roll -##ewise -amen -resolve -encouraging -uncontrol -newer -malformation -unfortunately -##otemporal -hsa -bony -tolerant -signature -proteomic -##okinase -##dis -##gestive -advoc -133 -answer -##agonal -addresses -##cg -##iterp -realized -antin -##0000 -highlighting -missed -contaminants -##51 -characterizing -lg -perturbation -hypercholesterol -crisis -toxicities -##onymous -cll -mechanics -obstetric -metrics -camera -sterile -##ander -##zz -##oselective -##ifferentiated -##tivities -invest -trust -atrop -vent -medulla -appropriately -jo -municip -hydroxyp -pears -ssc -dielectric -##rotid -naloxone -abrog -protecting -##ouracil -danger -##iline -waiting -nadh -cryopres -brush -insol -curvature -anisotropy -##otyping -ca1 -hern -joh -##orate -oils -vd -##arach -treg -accidents -cd44 -ky -clustered -##admill -ecc -##operoxidase -npc -neighborhood -palate -bad -consumer -sorption -##uran -treadmill -dedic -neuropathic -ftir -##ynchron -indigenous -political -patent -interfaces -chemokines -lept -von -##nm -##urc -heterod -thalass -unw -##marks -stores -arterio -theta -##amn -fid -5p -discrepancy -methodologies -alters -reinforcement -mediastinal -enteric -ubiquitous -colloidal -imid -cigarettes -england -##imension -##inities -retinoic -excitability -155 -homogeneity -workplace -ketamine -repression -##orable -vacuum -labour -formulated -clostr -determines -swimming -responsibility -vte -ascorbic -##72 -##grp -seedlings -nu -hypothyroidism -transcriptome -dystrophy -desens -condensation -##ositis -##andibular -hypothesize -discrimin -ahr -biofilms -##hol -arrhythmia -unev -brazilian -catalysis -mismatch -phenylalanine -methylene -##opy -illumin -ica -##oxia -distinguishing -nested -problematic -transit -##relin -##ughter -accompanying -orthodontic -##empl -engage -##orex -##therap -cd11 -compositions -ecosystems -parasit -clinicopathological -mandatory -##traumatic -xenopus -emo -s100 -laminin -##nitine -gentamicin -pha -allergens -radion -pem -verap -##osexual -##umps -##omavirus -##ends -silicone -conformations -##ictor -tolu -121 -cong -##icious -microflu -gastroenter -agree -desm -caregiver -##functional -horn -impression -microtubules -##ieties -verapamil -mrs -isometric -tetrahydro -##ophenyl -stopped -prototype -intellectual -droplets -##iled -exercises -drying -offic -adc -##tigma -dependency -machinery -suggestions -nanotubes -dedicated -legis -hematological -capacities -##1b -proteasome -discussions -progressed -divergent -ciprofloxacin -pearson -testes -lewis -retain -##epsin -scientists -antenatal -calls -##adjuvant -conce -mail -amputation -leadership -metalloproteinase -##ophylline -combine -scarce -centrifugation -reform -##genesis -anticipated -##ez -homocysteine -nodule -##odex -127 -astroc -sustainable -administrative -orn -managers -contractility -elemental -postpr -encourage -trav -##oxetine -seminal -regulations -entropy -rhinitis -transported -trabecular -ks -illumination -sorting -reoper -transiently -radiography -fda -##100 -pools -histories -electroencephal -aetiology -thaw -potentiated -perip -transgene -suspensions -antipl -suppressive -kir -valves -##anus -void -cpp -epi -vf -mesoth -efficiencies -flanking -ej -avp -packed -tuber -lysozyme -opposed -antidepressants -mexico -##aches -nont -calibr -##nar -hypo -laparoscopy -abeta -leaving -bicarbonate -spiral -##onephritis -arousal -##hi -simplex -updated -##emed -condom -puncture -radiologic -##amol -##ranean -ischaemia -attained -htlv -translated -microns -##quinone -##acheal -##acetic -plain -presynaptic -grating -##ialdehyde -hir -##m2 -##vix -suited -nether -prey -thymic -##iella -thalamus -periv -closer -supine -coagul -prere -federal -constitutively -ovulation -##ania -##develop -coded -titration -collecting -glomerul -##ogeny -broth -veterinary -master -anomaly -##itamin -ova -necro -contextual -dyspnea -evap -##esthetic -plurip -orbit -##ills -neutropenia -islands -rights -132 -colonoscopy -sugars -abstin -collect -interfacial -reperto -ghrelin -##omnia -##ocrit -opioids -analytic -helicobacter -eds -cannabis -canonical -packing -nanocomp -220 -practitioner -nephrectomy -imprin -pyro -compromise -congestive -purity -adenylate -ward -##ymethyl -##uinal -p16 -sonography -138 -sweden -ileum -##clim -rfl -effluent -##iber -splice -conve -clo -utr -measurable -stigma -undetectable -##rolase -##osoma -discharg -triaz -##ionate -aes -netherlands -dropp -h⋯ -thirds -commitment -stoichi -pbmc -c18 -dominance -aligned -pose -rid -elution -worksh -cylind -##zes -radionucl -mip -parametric -propyl -modules -obviously -##quar -antisera -breed -dysregulation -pom -qs -esi -possessed -strip -metap -extrav -portions -neuroc -mediter -##bach -slice -tonic -webs -lamp -schemes -manganese -immunologic -trigem -acutely -torque -enos -cobalt -approximate -146 -atmosphere -##bd -osteotomy -typhim -serotypes -satellite -cholecystectomy -##diagnosed -artifacts -soph -sweet -pups -leukemic -arbitr -##acs -suspended -wake -##itre -vasculitis -inexp -els -laryng -typhimurium -neoadjuvant -##irectional -expertise -moisture -amphetamine -protects -periphery -grafted -rdna -inheritance -huv -intelligence -anionic -oxo -sevent -mutual -overexpressing -hoc -##olation -undes -expectancy -##ucleotide -smart -habitats -##itum -enroll -assuming -accr -##120 -heavily -cel -shortened -nigr -detergent -sensation -emotions -##cales -mucus -cyclosporine -febrile -osteogenic -encephalitis -gov -sedimentation -hematologic -ultimate -##tillation -adam -##itish -hemodynamics -hit -hydrocarbons -confounders -approval -originated -osteoclast -cml -flash -phthal -cornea -exud -genotyped -contraind -covalently -symmetric -##mi -virolog -##54 -##oderma -##aric -assignment -triggering -immunoblotting -ensemb -moieties -orf -##inator -oncogene -cytomegal -##del -##ris -conception -medicaid -paralysis -parox -spirit -branching -organelles -orthopa -##enn -collapse -cholest -subop -serving -##ker -reticul -cil -sad -mineralization -phosphatidylinositol -biased -volumetric -manufactur -dbp -##infection -alternate -initiative -cultivars -crest -solving -preserving -pelvis -meningi -tensile -vinyl -accessibility -##ostr -##opril -transloc -indoor -skeleton -pacemaker -##bound -ws -globally -##olys -interne -figures -##iff -1984 -inhab -charges -1970 -whites -##ulture -zo -diseased -##ught -kingdom -rnase -144 -##oreceptors -internalization -raises -atropine -valv -##imensional -##urate -##roch -##ocryst -##rops -##52 -anaemia -microrna -##tide -consciousness -pacific -scf -spores -##omorphic -manufacturing -frog -cpr -##opause -thrombotic -postprandial -bbb -calp -acetonitrile -stenting -##cephalus -latest -nodular -coherent -tetracycline -hospitalizations -18f -chl -households -afp -affinities -deaf -cities -##iomy -vitreous -##ortal -oest -transmit -informative -tocopherol -chlorophyll -donation -likewise -infancy -potato -submuc -##oste -##oted -exclusive -taxonomic -seventeen -##optic -daytime -pcp -dal -supernatants -nucleoside -raising -capillaries -resten -explaining -irrigation -surgeries -##nps -vwf -agencies -increment -tang -conjugation -##neal -##olism -##azolium -##ilum -##ama -hemolytic -##uling -##uns -coat -transducer -osteoblasts -autoradi -vitr -secreting -##yles -triphosphate -##omotion -branched -##nals -##oplasm -comprise -begin -copolymer -##entanyl -infiltrating -vox -##oplasts -proteomics -sparse -diabetics -hinder -foam -cage -lutein -infras -sacrificed -##orporeal -cervix -acquire -tidal -distortion -trapping -vena -oligonucleotides -rotavirus -thinking -recommendation -radiology -nose -inexpensive -##atinib -package -mandible -measles -##imil -deeper -##birth -pir -##ochlor -progeny -triplet -career -vr -note -##eles -##hips -cryp -silent -##olith -contour -quercetin -##±1 -carbohydrates -##amid -pcb -dith -motone -specialty -##ophores -journals -hrv -##produc -##atelet -overd -##idi -##fulness -b6 -sciatic -exacerbation -insoluble -unm -outline -plantar -subcutaneously -lactobacillus -##ubs -pcl -postmortem -nev -mep -undersc -farms -##adium -disposition -monomers -pharmacists -##terenol -impacted -combines -##itoneally -cd40 -mediterranean -pix -neuroph -repeatedly -sheets -nlr -##ammon -cytomegalovirus -outlined -depolar -unin -crown -arteriovenous -##atil -hgf -##bles -rhythms -innervation -neurotrophic -eq -endocarditis -unsaturated -maldi -anorex -monocyt -phosphodies -humidity -##peritoneal -pand -##g1 -telomerase -##assays -##olk -cyp3a -glucocorticoids -proteinase -##organization -##athers -ovaries -##plicity -unfavorable -melting -bpd -came -seropositive -liv -##r2 -anp -benzo -##room -##acetate -##aceous -mhz -elegans -a549 -##requency -inverted -sarcoidosis -exclude -cd45 -ibs -ou -wb -##nic -##ogroup -prostheses -148 -dealing -cing -independence -osteosarcoma -primates -##hythmic -initiatives -##ectomies -periton -collective -amines -neurones -replicated -##ithromycin -undergoes -##uspid -overestim -instance -carboxylic -pak -flavonoids -audi -disturbed -pharmacodynamic -oblig -3a -snr -##imeter -distinction -holl -fibrils -khz -pneumococcal -catabol -searches -trigeminal -suboptimal -cathepsin -este -dopa -broadly -alloy -monophosphate -nifed -insomnia -texture -firm -nh2 -viii -penetrating -##enses -enk -mammography -cotton -gamb -kle -dutch -synchronous -litter -proteolysis -autonom -##l1 -parenchymal -competent -multin -salic -seques -3t3 -uncontrolled -determinations -kcl -##enders -steel -individualized -##inis -##oprost -stabilizing -endometrium -015 -exploited -##ogether -caring -fentanyl -plaus -nifedipine -tomographic -##oxication -##calc -timp -nsa -haemorrhage -##uminal -freshwater -##dynamic -altogether -gar -british -physiologically -alternatively -##64 -neutron -automatically -isoproterenol -malond -mps -accelerate -uniformly -##inoid -##roline -##orations -stretching -sensitiz -keto -implicit -trh -wing -##onder -keeping -compares -www -##izers -tow -tar -helped -discriminant -ry -arises -##85 -##erine -##quartile -controversy -phosphatidylcholine -discontinued -aba -cpt -intoxication -digit -cet -bifurc -baby -eat -coastal -ducts -audit -subarach -fk -##aul -notable -undifferentiated -##omide -transpos -nih -interquartile -explains -##trial -elusive -marital -bnp -disadvantages -troponin -parotid -somatosensory -bfgf -bod -ton -charts -mannose -suppresses -##etite -##riting -primed -attenuate -radiologists -protr -##pray -ecs -hept -touch -hydrogels -##inally -versatile -dispersal -superfamily -housing -intraperitoneally -leishmania -1d -multip -139 -bactericidal -leukaemia -##tebr -welf -flor -##ocele -tym -associate -robustness -redund -preserve -crops -cultivated -indole -posttraumatic -sew -peritonitis -bg -##amate -sport -##omatosis -extends -ventilatory -whit -virulent -characters -corrob -searching -fibro -linker -restenosis -##vern -fisher -discrepancies -esteem -midline -residential -bioinformatics -groundwater -supplementary -welfare -sit -globulin -qtl -##absorption -tibia -parenting -hemangi -declines -malondialdehyde -continuum -nationwide -flies -tablet -##71 -positioned -##ropyl -quarter -rheumatic -##axial -adjunct -ptx -reactivation -prolongation -lumin -freely -ppv -ocean -proposes -emulsion -cutting -nanocryst -##py -##urf -resili -implying -134 -vegetation -replicate -hyperthermia -iodide -hamsters -spanning -uninfected -flows -guanine -pyridine -inpatients -instrumentation -##uliar -##tt -##lated -oestr -jump -tips -altitude -topological -peculiar -prerequis -cag -toll -##ket -incen -nhl -kj -moral -menopausal -cgrp -esrd -dysphagia -amd -ataxia -oesophageal -##iliated -myofib -choroidal -alf -##ypsin -##actone -psi -posture -fluorouracil -impuls -erosion -durations -remn -##itch -infective -notic -##rect -permitted -offering -classify -psychiatry -##ticals -offset -stap -displaying -reality -methylated -##ondii -swedish -friends -methicillin -tremor -tracts -##oto -##ographs -references -plc -psychopathology -earliest -99m -delivering -##otri -documents -deleted -##cope -dichro -narc -##cycl -##best -231 -elevations -##icl -religious -microal -bap -##ishes -carc -pouch -cellul -subarachnoid -##oalveolar -emptying -diaphragm -disin -dots -##dc -012 -gaussian -prescriptions -s2 -##apatite -osse -##oter -poul -relapsed -histocompatibility -mib -occasions -atrium -dbs -cytoskeletal -diploid -inspiratory -buccal -##oglycans -1982 -inguinal -eyel -fg -continuity -##itivities -##aicin -##cl2 -precondition -destro -monocytogenes -##atrol -homogenates -leiomy -beef -string -readmission -prostatectomy -neurotoxicity -displaced -maca -odont -escal -photosens -naphth -osteoblast -adduct -##amins -afm -bulb -sentin -acceptability -lifes -ampk -grand -99mt -##osm -calculating -strictly -nrf2 -parv -tilt -pax -telomere -cavern -electromagnetic -multifactor -update -iris -##acute -carnitine -##using -ocd -cava -dehydration -anomalous -recoveries -##inous -smears -denti -discharges -epr -obstetr -orthopaedic -uncon -##ications -##iop -##ofrequency -uncomplicated -bpa -everyday -##ned -pores -morphogenesis -superc -##unctional -gastrin -cd25 -##veratrol -glioblastoma -reconstituted -pyrene -leave -opportunistic -adopt -repetition -menopause -thromboembolism -cirrhotic -rflp -##idia -grap -microcirc -carcinogenic -pun -136 -168 -##night -microin -threonine -##opoiesis -##orrhea -capital -stably -nhs -##itan -##istine -incorrect -endocytosis -bioc -checklist -directional -music -##esus -##therapy -##electron -tele -puberty -##xine -##imentin -pter -myo -cytological -##elastic -salinity -tal -paste -##eterm -##ford -##aginal -capsaicin -enterobacter -civ -cdk -rifamp -horse -pathogenicity -tears -preinc -##son -zyg -nid -##97 -alternating -##ulse -turkey -cave -kc -intermolecular -monomeric -proteome -rnai -apex -client -spacer -scl -duodenum -microfluidic -vimentin -hox -##ipar -##anyl -radiofrequency -zym -legs -cds -feelings -rectum -scattered -nsaids -electrospray -attracted -##icted -symptomatology -opac -squares -tert -refined -dressing -substitute -extracorporeal -metac -##hedral -operate -dipole -##romic -solve -ureteral -neighboring -norms -hdac -accumulating -anaesthetic -##oreact -1983 -recycling -maximize -papillomavirus -cd14 -5h -hen -##trials -unexpectedly -beats -retest -pesticide -seq -diarrhoea -clarified -resveratrol -##orphin -enrollment -outgrowth -knees -##forward -ny -##idden -##anch -concentrate -orange -wi -catecholamine -tagged -inferred -rim -adversely -##aded -fec -spectrometer -##izz -lod -##aryl -nanostructures -supers -acclim -teg -##acterium -descriptions -neurophys -##uistic -presch -justif -thigh -##uates -agarose -triang -hus -explanations -stated -ors -##rophin -repertoire -aven -erythroid -choose -recognizing -##udin -resis -aggrav -##othorax -mechanically -playing -gases -##opharm -congr -vagal -##zer -3rd -mms -sert -scot -stereot -##iring -##holders -tent -2nd -##bf -lactose -chitin -##estib -chromium -concurrently -neuropeptide -fundus -antiplatelet -epa -hamper -immunogenicity -##enone -outd -cpb -decarboxyl -intral -subclass -adaptations -##cor -clinicaltrials -signatures -iib -##azolam -desire -##orylation -quantifying -##leting -delir -niche -platforms -medically -oncogenic -ritu -##fen -viewing -##oxon -ileal -ninet -##erents -suitability -noticed -153 -belief -##dv -spastic -harmonic -faced -insignificant -probabilities -broil -nineteen -aps -##omegaly -##ei -maturity -##uber -permeation -gsk -scanner -twofold -175 -arranged -sickle -##imid -##ju -sorb -mirror -tubule -overs -alginate -nicotinic -gbm -neurodevelop -erbb -nv -hyperth -engr -prime -biotechn -stern -interl -linearity -##ectile -##isal -vegetable -hrt -##entic -##61 -molars -##atable -beams -lister -absorp -ears -##cnac -purch -sla -900 -clade -globin -fairly -convergence -sophistic -disclosed -longev -##oxib -freshly -99mtc -penile -aesthetic -rhesus -mari -nosoc -supplied -##esi -##orum -rituximab -emphasizes -clostridium -unexplained -cofactor -terres -zeta -defibrill -registr -conference -143 -friendly -starvation -carcass -bundles -gpi -consti -aaa -visually -mur -perturbations -belongs -##ago -##know -anaphyl -pict -##md -rhabd -##hp -passed -minimizing -parap -annually -silico -transfusions -sentinel -abr -trifluor -emergent -mcs -aiming -dmso -ultrason -##iax -##ham -latex -##uish -lich -calcitonin -abstinence -coal -locomotion -tetram -han -pcv -cavities -actu -##athyroidism -188 -dentate -sensitivities -dissip -mimicking -spear -blunt -##gin -##eas -##yll -preschool -dendrites -##vef -hrp -semin -xrd -endo -ltp -pericardial -lox -orthopedic -specificities -episodic -gcs -eph -##ault -lacks -uniqu -longevity -bridging -dfs -grains -frames -probing -clonidine -consolidation -polysaccharides -##ocarb -deuter -##epr -chlamydia -142 -##oluminescence -seaw -regimes -##erver -r1 -guar -lambs -figure -choles -chimer -l2 -transitional -burns -##cranial -alpha1 -nosocomial -vii -##ocarcinoma -stabilize -galactosidase -edi -##itt -pull -screws -easier -##orial -##timulation -##acylglycerol -dropped -untrans -makers -gondii -##bu -nanofib -dichroism -##uronic -intolerance -nematode -##iatr -##cil -senior -diazepam -ideas -ensuring -slc -decid -primitive -##icked -constrained -ups -prolapse -neutralization -resilience -provinc -fluxes -unless -addi -stakeholders -##entified -orthog -haloper -emerge -germline -theophylline -vldl -vasoconstriction -recre -org -crystallization -paternal -predisposition -luteal -##rofen -isra -yolk -primate -##ogaster -anneal -quartile -hsct -##agglutinin -##atoxin -##house -optimizing -ophthalmic -bradykinin -seasons -literacy -forebrain -dispens -pam -exceed -##ulous -absorbance -lists -##ervated -coff -adenocarcinomas -multifactorial -coronal -disrupt -programming -##trate -016 -inference -hydrocarbon -wc -##tebral -facilitation -peers -shoot -extrinsic -stressful -microbes -holds -prevail -genomics -165 -carbonate -navigation -parity -morphometric -##rp -entering -haloperidol -##roportion -feces -##erating -lacked -haemodynamic -equine -residency -loos -##isson -##formed -deprived -prednisolone -goats -##terone -gangli -filtering -game -gliomas -##otocin -defer -##duction -partitioning -psycho -##atally -schizophrenic -emphys -esr -adiposity -shortly -141 -013 -##etts -##ocic -##iley -evaluates -feet -bacteremia -photosynthesis -##lp -diurnal -collision -hsc -hypothetic -immunomodulatory -##idth -sphere -glutamic -occult -rop -pedicle -dap -barr -chemoradi -prednisone -ulnar -acknow -unequ -##abain -##ipa -##pine -glycosylated -obsc -spiritual -dying -gdm -gabaergic -predisposing -regards -subscales -myx -meals -delirium -arf -gaz -autonomy -##acetyl -occupied -suv -gathered -xenograft -gln -splitting -jug -convert -##ospital -bph -acetylation -##uent -lies -##ck -chloroplast -##ocol -##02 -cock -isoelectric -substituents -##con -iia -cingulate -facile -boost -psychotherapy -iat -##olipids -ouabain -##yrib -handed -##aliana -ciliary -bilaterally -##tains -nanoscale -oligosaccharides -punc -fossa -thaliana -subcortical -##observer -convenience -olds -eug -strips -scn -aneu -therapists -ascer -catch -hips -014 -eosinophilic -facing -neurof -sitting -pfc -##width -anorexia -lobes -firstly -considers -bisphosph -thalamic -##cholinesterase -##ken -poultry -##uccess -##riage -vitamins -laws -adjustments -beta2 -repro -pauc -linole -biocompatibility -prompted -files -sounds -ecology -##iaceae -##oxal -carries -mbp -excreted -mining -sister -rapamycin -360 -320 -varieties -vesicular -trimethyl -##ersed -##ipramine -149 -undergraduate -##v1 -n1 -glucan -retur -biochemistry -##agl -wavelengths -##quilibrium -consequent -##ropr -##uccessful -harvesting -nonr -intercal -lot -gastrectomy -honey -letter -##usted -158 -011 -shifting -foxp3 -notice -##wt -wine -hfd -solved -phone -versions -dynamical -taurine -syph -disproportion -corpor -neurogenesis -seb -##×10 -swallowing -peaked -win -presently -unsuccessful -##ifies -olive -piglets -trophic -thrombolysis -vulgar -ug -1981 -divertic -##efined -unpr -##usions -018 -##ognitive -##atized -##si -ribonucle -##amides -rom -worked -appearing -##amphetamine -interdisciplinary -diagr -pten -writing -benchmark -savings -degrading -b3 -solute -dosages -psychology -nasopharyngeal -mutagenic -supram -equil -oligomers -ncs -constipation -##vitamin -##lycaemia -##1r -##odermal -resembling -fim -##oproliferative -aneuploid -places -th17 -230 -brachy -##param -turned -concomitantly -algae -vulgaris -saving -predominance -homozyg -dere -gonadal -##gran -exempl -##obia -karyotype -intrahepatic -normative -encapsulation -##illing -moved -##icide -293 -granulosa -##157 -chemotactic -comment -mycoplas -switched -doping -##hydrop -stric -cruz -pitch -doing -1r -biases -crs -hematocrit -ingredients -constituent -##eld -metam -imposs -thousand -subpopulation -orch -##anous -##otoxins -topology -##cens -mexican -comprehension -chym -##istinguish -##imal -ihc -stems -crossing -##zo -bioassay -commission -2c -colocal -##inol -##ronate -poisson -belonged -1c -pineal -aided -##iling -streptoz -standardization -fi -octa -##ki -histi -##istering -advancement -##ried -##e1 -gastritis -oedema -##udine -mpt -subl -plasmon -explants -eus -##icate -transd -tinn -immunocytochemistry -jaund -crack -gate -speaking -erectile -transcranial -readers -ancient -##wich -sulfide -voltamm -incon -preced -ink -interrup -precl -acetone -nociceptive -appendic -##iana -encounter -##imotor -alcohols -##ano -alert -kt -decarboxylase -hydrocephalus -calorimetry -##exual -ticks -spearman -melanogaster -patterning -recognised -hollow -glandular -##ara -couple -ch3 -combat -father -##plicating -147 -toluene -carotene -280 -b12 -interruption -supplemental -lasted -anastomotic -extensor -assume -chicks -grip -jur -imposed -liquids -messages -terrestrial -pm2 -tinnitus -##okes -statistic -aldehyde -antagonism -quies -latencies -reoperation -inward -##eratin -grey -vh -detoxification -clav -drawing -sq -##unted -hepatectomy -foster -possessing -anap -message -##istinguishable -statement -##aminidase -annexin -sepha -ingested -##nac -incidences -bronchi -##tii -##atism -vmax -ruptured -polyuns -ctx -inbred -##87 -##ario -cca -warming -reconstructions -##ocytoma -autonomous -retroperitoneal -phagocytic -μmol -catalyzes -manipulated -subspec -estrogens -sepharose -ali -erythromycin -##ng -smallest -wit -mitosis -topography -017 -bevac -sophisticated -afferents -lactamase -eic -homa -forsk -gravity -uncou -gins -h4 -prokary -solids -restraint -tried -bevacizumab -##kes -tms -##itoneum -esc -holding -##ulo -choosing -poses -ebp -assimil -arabin -invertebr -##onergic -gbs -terminated -gemc -152 -radiolabeled -##aca -crash -hyperal -grid -comparatively -bronchoalveolar -msm -helping -nephrot -histochemical -##enicol -154 -setup -rash -gamm -##oresist -##abr -pandemic -mev -cred -secretions -instructions -beet -mpo -steatosis -tsp -enkephal -##opid -arbitrary -cps -eae -##imetic -attendance -lad -panic -nucleation -japonic -atcc -erythropoietin -uroth -indistinguishable -noneth -looking -thiaz -intent -matern -epithelia -##hem -ptc -##entral -intras -transportation -ecd -gating -capsules -##2o3 -##rov -##po -nonetheless -##globulin -ampicillin -coffee -favored -debrid -teen -transparent -##96 -qrs -aggregated -fractionated -distraction -basolateral -lvef -intracerebral -hypop -plausible -##udi -catecholamines -hetero -immunocomprom -polyunsaturated -gemcitabine -planes -autoimmunity -multidimensional -cyclodex -valence -localize -cores -interested -desensitization -##ainees -diluted -hydroxyapatite -010 -infrastructure -asbest -sensorimotor -contig -speciation -destabil -##i2 -debr -paw -quit -chief -das -driver -lever -sandwich -strands -lignin -thymocytes -afforded -tacrolimus -neuropsychiatric -multimodal -swiss -##aris -mimics -##com -##vertebral -##icile -fibrotic -deliveries -destructive -standardised -prothrombin -lymphoblastic -##ogrel -##acers -dyslipidemia -narrative -##ropol -##gus -##force -cited -##ammonium -dvt -projected -##ophthal -adr -##aec -##arate -shrink -##tisone -px -##othiaz -flowering -##endicular -hyperinsulin -caudate -cotrans -cbt -ltd -bmt -multifunctional -forskolin -utero -eif -beat -pss -chew -serotonergic -equipped -chemotaxis -antinocic -cytometric -neurosci -biosensor -antagonistic -##ublic -##olinium -##imab -##osphere -inducer -barley -cholera -steep -vigorous -mesop -klebs -neurogenic -advancing -bari -myof -##itinib -constituted -folic -##here -##oge -overnight -##opent -arthroscopic -cocc -atri -inspection -##ovol -schedules -aed -hypothetical -cleaning -synchronization -fails -capsular -lex -uveitis -cope -ejac -fathers -##94 -coatings -neglect -perpendicular -personalized -intrathecal -telem -##acted -hydrolys -polych -156 -alkaloids -flo -nonm -vegetative -lifespan -hydrodynamic -embryogenesis -modulator -##enedi -amyloidosis -##itates -prerequisite -interconn -monos -silk -##tention -vasodilation -prion -##elioma -h7 -##astin -wbc -erp -nigra -collections -dentists -buffered -multifocal -192 -etiological -modulators -##imburs -devoid -upr -ewes -##acia -bupivacaine -eukaryotes -roughness -otitis -##elity -##iximab -haart -pcbs -##edicine -aun -costly -##emoral -covers -##ocyanate -eliminating -thyroxine -operon -##oxazole -hepatoma -v1 -shedding -24h -electrocardiogram -book -neighbour -republic -friction -wol -##oliosis -advantageous -occupation -##enium -vicinity -microglial -270 -tu -digestib -##oventricular -blacks -hydrophob -pyrrol -##5a -##olor -predictable -##odom -echin -##oves -repeatability -oat -##estin -kpa -##ij -reimburs -##ecu -recon -semiconductor -##vert -flora -intrig -ferric -coma -##oxins -discs -tunel -celiac -##inescent -maneu -impossible -##inone -streptozotocin -##ulinum -tunn -secrete -going -pictures -analyte -##angu -##ardiac -cytokeratin -twist -mutational -dissociated -worker -sic -folds -trapped -apob -surrounded -##ael -alleviate -phosphodiesterase -inspired -straw -##adine -layered -bz -meiotic -c3h -nh4 -neovascularization -gfap -wavegu -campyl -ets -resections -pcna -till -damp -visualize -licens -##agas -repaired -immersion -##uzumab -##oplasma -pointed -t2d -##onomically -##entful -##apeptide -counselling -advis -reconstructive -restrictions -dwelling -originate -##eresis -mist -##aenoic -##fraction -syphilis -allografts -syncy -sts -imt -tuned -chambers -uneventful -store -herbiv -hens -evolve -##etent -palp -homo -klebsiella -adjunctive -immunocompromised -difficile -sativ -##algia -##olia -edges -nationally -coum -tricuspid -modular -sequentially -redistribution -ovid -epstein -micropartic -##trypt -##osteric -neb -sigmoid -unidentified -labile -protons -receptive -price -##azepine -msec -huge -env -drg -workload -antiproliferative -augment -reconstitution -pufa -instant -##1a1 -pertin -##eit -tensor -##class -pharyngeal -cec -trainees -stic -replacing -surfactants -##mer -cts -formaldehyde -restorations -##phenol -unpreced -nanor -oblique -isotherm -##idis -##drug -mosquitoes -exacerbations -clopid -scid -contrad -voxel -ehr -2r -ua -ec50 -hk -cnt -saw -stz -apoa -manus -nep -harvest -guiding -filamentous -unprecedented -outper -ascertained -purkin -udp -elucidation -sixth -aper -##ineral -amphoter -clopidogrel -supervision -##opolym -frameworks -synthes -activators -analyzer -intraepithelial -##bt -purkinje -qds -microbiome -biophysical -elsew -pim -chrys -gle -campylobacter -compressive -elsewhere -##wa -##ethanolamine -purine -ending -companies -productive -interneurons -effectors -integrative -pbmcs -preex -immunohistochemically -##ira -##ilton -##emide -frailty -encouraged -anticonv -osmol -warning -dissected -straightforward -peroxisome -##oct -##astically -filtered -glomerulonephritis -##adone -thyrot -elastase -##nem -cxcr4 -##oscope -isoflurane -underestimated -hunting -##rogl -cruzi -ribosome -occlusal -##iabetic -coenzyme -feeling -succinate -torsion -subacute -legislation -##oneg -strategic -uvb -##itants -atlantic -spong -ventilated -mug -infrequent -leukotri -##issive -infan -mesang -eta -vehicles -a3 -##oprote -##level -hydroxylation -orthogonal -jaundice -thy -biodiversity -151 -listed -bariatric -bos -##ycle -mif -##ylyl -gib -conjunctival -wheel -ulceration -depress -260 -##agues -nonsm -shut -trachea -pharmacology -oligomer -pole -##had -##91 -##iva -luminescence -ensemble -conting -mao -moments -imaged -resemble -danish -hbe -adhesions -spirom -hydroxide -rick -resum -deph -opinions -collagenase -plga -dss -intriguing -##562 -##amphenicol -##rical -flexor -neurodegeneration -##graphic -synt -##loxacin -manuscrip -pneumon -hamilton -trimeth -gata -##etaxel -confusion -emphysema -ao -##tizing -microvess -##pg -##tilled -confirmatory -dlb -##azepam -##epile -ascorbate -erythema -trout -penicill -iol -##osecond -h1n1 -flood -##uresis -arrival -tunable -reorganization -##tl -docetaxel -porphyrin -wedge -norway -cyp2c -cohes -consecu -appetite -safer -iterative -pover -computation -occluded -##emp -abiotic -hyperpar -bioreact -asn -polyd -##opsin -##ason -serous -immunoglobulins -ye -##tructured -poverty -ontario -gum -##plo -ara -μl -opens -jugular -##bing -157 -tha -diode -ccs -genotoxic -immunogenic -brca1 -methadone -mant -scatter -fvi -ionizing -smc -ventilator -dx -mannitol -microarrays -##emias -ku -exchang -devast -rostral -pher -benzodiazepine -bread -##bg -compensate -falling -cuc -mcl -mm2 -##asth -emitting -willingness -ovx -qrt -consecutively -abrupt -metabolized -l3 -sickness -iiia -oxalate -unre -igg1 -unfolding -ellip -transformations -atrioventricular -ceramide -ended -ferment -ards -substantia -##his -reasoning -ribose -mgl -photocatalytic -exit -conferred -##ko -albeit -glycolysis -lbp -##thrombotic -##arith -scoliosis -750 -##itely -reaches -rbcs -pigmented -hypn -myrist -##yridine -fidelity -oroph -##obiotic -##atheter -tka -inotropic -tracing -mastectomy -##bens -utilizes -anhydr -lie -acted -coming -approached -ori -worth -slides -##juana -granulomatous -##atility -guarant -reinforced -harboring -reticular -married -reflectance -granulocytes -paralleled -apl -##rosy -vasoactive -pertinent -##b3 -eluted -corros -##uce -##faction -macroc -shig -retarded -motivated -macromolecules -philos -digested -1500 -tactile -##olymph -paroxys -export -energetic -##otherm -defence -##obutyric -crim -caesarean -portable -biogenesis -install -177 -unr -antagonized -editing -fes -phenyle -weather -richness -uniquely -##ellate -heroin -healed -##urition -metron -ancestral -sutures -##hc -##glucose -bear -1979 -isotopic -thromboembolic -spikes -hemolysis -neglected -cz -##aturation -verification -ascribed -infiltrate -phle -pill -##airs -coordinates -##omogene -pluripotent -hidden -bromo -colleagues -overlo -accumbens -mct -destroy -##df -acetabular -denaturation -migratory -agriculture -phenylephrine -revolution -workshop -restrictive -investment -denervation -glucopyran -acetylcholinesterase -##idus -midazolam -centrom -polymorphonuclear -pag -incidental -sln -appoint -begins -lytic -##ifers -abrogated -temporally -advent -gauge -chloramphenicol -constriction -authentic -chemilum -conflu -retinol -convergent -rct -spore -intract -hypercholesterolemia -inclusions -aav -cct -infest -implication -dol -ovariectomized -neurite -cta -sct -##ankton -begun -prostaglandins -damaging -##olated -helices -tends -##odeoxy -debridement -cyclohex -vibrio -elasticity -motions -pairing -epiph -tender -etop -blindness -acne -35s -arsen -topois -oxides -##estock -tann -debris -narrowing -##well -outlines -doubling -spiked -mitogenic -pherom -microstructure -thromboxane -livestock -appendicitis -p27 -##onad -deformities -logarith -##icine -relates -harmon -empty -##life -asbestos -supramolecular -##achol -coexistence -##oem -disaster -cd2 -enteral -pea -labels -notes -mmps -wky -hallmark -##odia -oscillatory -census -ordering -##rna -glycosamin -##oviruses -supra -##icially -lessons -irs -##ozapine -sonographic -suck -##aminophen -disparity -caco -ais -graz -biol -answered -analgesics -pie -##odeox -##ubated -174 -no2 -etoposide -breeds -detr -yag -anticonvuls -anisotropic -averaging -valvular -liposome -##att -enthal -anger -euthan -consultations -marijuana -tonsill -anthrac -cruciate -washout -lysosomes -toxicological -glcnac -infectivity -gaze -declining -auxin -necrotizing -wiley -amphotericin -syring -##align -paradig -business -stemi -discer -disciplines -159 -bradyc -coexp -nonf -nanowires -compulsive -intravitre -interven -lec -##thi -awake -contrasting -205 -technically -implantable -paradigms -halluc -virgin -oscillation -o157 -##tian -hev -eru -intranasal -##yne -chir -computing -binge -checked -fx -##april -explicitly -visu -radionuclide -##noea -chorionic -dies -drops -leprosy -hazardous -thalassemia -mics -bite -roughly -controll -##arring -hung -bit -orientations -hek -##inesia -serocon -transposition -##oselectivity -consor -sialic -k562 -epig -##olus -##ovsk -##pic -explos -##ulph -##apopt -antidi -lining -compatibility -##b4 -comes -enorm -019 -spss -remodelling -herpesvirus -scs -##aked -xenografts -multilevel -missense -cdnas -pc12 -untranslated -remainder -mitigate -operatively -slit -jet -aromatase -##opeptidase -abscesses -renewal -bomb -##yelination -metropol -fluctuation -catabolism -chondro -porosity -rods -##uoden -insem -spatiotemporal -mismat -b7 -thousands -##dehydes -##±2 -##igibility -intractable -methyltransferase -esterase -interpreting -##sulf -##through -influenzae -opiate -fe3 -templates -preconditioning -srs -plug -thp -repressor -neurodevelopmental -hns -ipv -mek -##uprofen -##odipine -thail -subscale -##tegration -ampa -fingers -spermatogenesis -opened -culturally -devastating -##arone -fulfilled -devised -governing -epist -drb1 -##ocks -drift -caloric -photographs -thailand -##76 -obliter -antipsychotics -whitney -paucity -gmp -##isters -##retin -kinematics -##uridine -sibling -invariant -##qi -craniofacial -##oxan -pct -catast -innovation -biodegradable -##orphine -ameliorated -rew -##nish -midbrain -##optera -hms -sewage -pra -stereotactic -##arby -occasional -carbachol -confers -clozapine -rendered -pms -amphiph -##minth -naphthal -injecting -##ivocal -unal -dermis -##iens -relapses -##light -182 -assemblies -basin -##yb -pathogenetic -row -ud -nephritis -##gesia -##heal -atresia -linguistic -ordinary -semiqu -no3 -acetaminophen -photoreceptor -divalent -hyperparathyroidism -alcoholism -reproduce -stratum -185 -linoleic -##aned -##othane -chloroform -reti -ureth -##3b -##ocyanin -genotypic -trisomy -think -adrenaline -dams -initio -##enib -endors -disclosure -compensated -rabies -##clusive -reu -ams -mesangial -shot -##orus -##oplasia -lu -##arrhythmic -##74 -pharmacotherapy -crosslink -cylindrical -elongated -boron -##erals -pressor -tregs -##tail -deployment -warrant -ibuprofen -creates -bare -owners -##osulf -serov -eyelid -electrically -##aventricular -alkali -transfers -packaging -strikingly -hydrolase -delineate -##net -scanned -annotation -nip -##rophosph -philosoph -masked -captopril -botulinum -##entions -##afluor -nach -pq -supposed -tcp -solubilized -bird -cephalospor -authorities -organisation -cyanide -distorted -quater -mount -fvc -memories -washed -fad -fistulas -thoroughly -nanomaterials -inflamed -resembles -goat -##phase -relapsing -assurance -ria -antiepile -brachytherapy -flower -societies -unaltered -maxill -cms -heads -eligibility -insert -ventricul -acknowled -discriminating -council -enantiomers -abstr -mj -invited -halothane -argued -##utation -preventable -polyst -superimp -bridges -robot -p65 -pda -reader -##2c -myopia -##orphic -225 -popl -dipl -capsid -##kal -sertoli -egfp -cytologic -##mu -crh -##istan -contraception -crossed -randomization -##phos -micronucle -##obium -##ilin -junctional -##lofen -162 -impairs -dimeric -vor -aggress -##93 -doll -bla -gaining -##sd -##atre -ultrastructure -xi -judgments -nitroph -factorial -ral -minus -enkephalin -##roma -##ronch -annealing -##eller -splenectomy -xl -178 -##onge -##recogn -transduced -checkpoint -priorities -empower -##olase -leishmaniasis -triton -positional -##oris -arrested -listeria -gadolinium -adipocyte -##kinase -crystallographic -##oned -neurole -eruption -prur -breaking -lethality -creb -##omeres -##peridone -shrinkage -##elic -mmc -microf -##plas -puer -anatom -lipophilic -hemostasis -4th -nonun -digoxin -##thermal -tether -##eastern -rhabdomy -siv -eluting -##osomiasis -dtpa -seawater -##bur -nigeria -mediation -##ogluc -fluc -introduces -immortal -##f4 -neurocognitive -acidification -hydrophobicity -streptomyces -bioactivity -##orth -inhomogene -sarcomas -paramagnetic -spacing -immunocytochemical -comprehens -drinkers -##oinos -pyrimidine -astrocyt -##ospasm -hampered -photonic -saph -you -atg -jejunum -photodynamic -statements -ventricles -occlusive -homologue -ornith -specification -chord -listen -residing -districts -simulating -32p -##trated -dialys -##iculus -him -lun -worms -subsp -ternary -denitr -cd13 -lati -tetanus -bandwidth -prodrug -organis -educators -situated -melanomas -genis -symmetrical -##onates -preparing -fna -pulsatile -bifurcation -ranked -##legia -objectively -cubic -oxygenase -refinement -##akary -cream -degrade -territory -##ingly -execution -syncope -zoon -hairpin -intima -g6 -##ispens -hyaluron -historically -1978 -diastere -adequacy -numerically -soleus -anxi -##mitters -##roscopically -vdr -##olars -paroxysmal -shares -proceeds -chloroquine -environmentally -oesophag -##ometrically -lying -etiologies -doubled -164 -bilayers -cpap -parp -drastically -reactiv -fluoxetine -##lasia -##ini -groove -allosteric -lentiv -l5 -nct0 -etiologic -dysm -permeable -##nc -polycystic -##iant -##ineural -heated -banding -abnorm -melanin -osteoclasts -ffa -trin -neurotransmission -cadaveric -holes -backgrounds -farmers -calibrated -strengthening -push -dilem -ems -lamellar -radiosens -##ami -impulse -166 -##idene -reproduced -whee -##yryl -195 -tio -monoph -##oplatin -reared -oestrogen -superiority -nondiabetic -coarse -immigrants -twitch -placing -evaporation -manipulations -##entanil -##ocutaneous -hydroper -streptococci -flowers -##ocyclic -##eprazole -burnout -indispens -evening -##uro -##aterial -##yrid -repressed -cmr -sett -bean -evac -additives -reviewing -outdoor -##methyl -reflexes -aminoglyc -quadru -mycoplasma -cloud -connecting -choroid -paracrine -##onation -canals -scap -pedig -answers -iatrogenic -##ocent -michael -substanti -shallow -engraft -probabil -autoreg -ferm -shaping -fis -##idinium -imrt -participates -hundreds -adri -sga -infertile -infantile -cycloh -spheres -concan -monoxide -5th -herd -endonuclease -doi -##lysis -amenable -valpro -amphib -0005 -glomeruli -264 -mimicked -symp -synapse -pest -##aturity -multist -##apoptotic -nmd -trough -cftr -##olab -##allic -reviewers -probabilistic -expectation -visited -uncertainties -heterozygosity -muller -##asin -anthropogenic -e3 -##phi -plei -##ropin -##kb -sp1 -completing -dsc -elementary -hydrolyzed -dephosph -counterpart -mortem -ppd -167 -##orphyrin -##atine -colch -judgment -transgen -tnfalpha -cron -po2 -##ofuran -1975 -pyrophosph -titres -androst -equivalents -metalloproteinases -dimeth -clotting -myofibrob -migrate -ideation -aza -##fection -##berry -nz -interrupted -pbl -cnv -spa -cic -proposal -confron -polystyrene -phosphoryl -##timulated -dinit -##omatis -incorporates -interests -csp -clinicopathologic -scene -steric -##lys -imperative -nanoc -failing -committed -exciting -lpa -oleic -mediastin -workforce -dwi -submucosal -##yron -##rals -axes -secure -hed -##alis -##onegative -concre -##atergic -seeded -gallst -##anat -##yseal -igg4 -phylogeny -mounted -obstacles -tendons -maintains -rigidity -simvastatin -##azosin -said -carbap -dividing -descriptors -##book -suppl -stacking -g3 -witness -geometries -##187 -##opreval -sponge -##86 -pleth -##ja -marginally -asparag -justified -diphosphate -synchronized -##anide -lond -ameliorate -eclam -syngene -smoked -##ishment -mts -trophoblast -##urethral -monoamine -conductive -seropreval -abnormally -mobil -dish -##ximide -methylp -ch2 -hook -metropolitan -cyclodextrin -ecmo -song -reprogramm -##oretin -worst -genistein -wards -kar -handle -hypoplasia -acros -##icides -marketing -granuloma -mycobacterial -##ima -163 -unmet -arose -herds -raf -reacting -motivational -##omatic -strengthen -##hu -##arenal -##adec -barb -restoring -occupancy -blastocyst -##avalin -quadric -patellar -anchored -tcm -##vian -glutamyl -lect -traction -inacc -climatic -##ocystis -##azo -##transferases -##lab -##inic -##ympath -subdivid -speeds -sterol -bleomycin -o3 -oligo -sevo -biomaterials -deals -i2 -##aulic -drawback -6j -pharmaceuticals -scavenger -megakary -thematic -bib -disag -chlorine -acinar -fove -sinusoidal -syngeneic -##orem -##p4 -gerd -neurotoxic -readings -##entistry -##ximal -swabs -phenotyp -172 -##x10 -h⋯o -eradic -169 -##acea -feat -premenopausal -cd16 -violent -disabled -abstracts -introns -topoisomerase -survivin -unrespons -acetyltransferase -chaperone -carboplatin -forests -minerals -correspondence -retroviral -dispersive -osteomyelitis -##oly -hhv -glycosyl -unpred -rhamn -empirically -subtraction -hong -nyst -bioch -stricture -adrenoceptors -accred -##esp -synergistically -larynx -disposal -thc -vacuoles -vasodilator -mildly -nest -quadrup -catalyze -addic -ei -glutamatergic -proteoglycan -intimal -demanding -##imilar -perfluor -myeloperoxidase -intravitreal -eclampsia -bead -5d -concanavalin -retrovirus -##ospecific -##oxylin -viet -deacet -trachomatis -specially -dangerous -imatinib -mth -acp -##arians -physics -icr -cyclosporin -##terolateral -##inus -lym -##burg -androgens -dithi -neurotransmitters -212 -resembled -monoc -ortholog -##sr -gpa -manually -infect -adds -dentistry -oryz -##uitr -perivascular -reuptake -lem -##tryptamine -covariance -xanthine -##otonic -##inia -ken -adver -##amers -butyrate -steroidal -nanocrystals -##onasal -photoelectron -rectus -corrosion -cardiology -rhythmic -methacrylate -##l2 -##ocene -epicardial -glycans -##ovalent -chemopre -##cys -limbic -decided -consul -peptic -##anthine -instillation -##oflav -nis -suspicious -##uctal -176 -brdu -elucidating -cavernous -constraint -civil -161 -nephrotic -wearing -brack -##thesis -intercourse -orthotopic -blo -fragile -niger -pgf2 -quiescent -msp -lpl -contradict -perceive -lasers -funded -lactating -appraisal -##athic -supervised -cryo -uti -pertaining -pcd -precipitated -dab -birthweight -evaluable -counted -##anthin -copolymers -pph -preincubation -scarring -referrals -registries -##agger -integrins -retraction -alpha2 -bedside -motile -breakthrough -##operiod -##ymic -ionophore -##kd -##ocalcin -chemoat -glia -polycyclic -yeasts -imagery -pean -dissatis -homeostatic -##uses -thoracotomy -bradycardia -xenobiotic -##imine -bivariate -antiepileptic -informal -london -pubertal -deemed -eia -saphen -barium -underp -##c3 -tachy -bun -##400 -analysing -amy -dict -indispensable -024 -longest -##nv -fertile -preh -snail -triti -pei -microcirculation -intracl -sevoflurane -##ocap -028 -2d3 -moss -reserved -extrusion -metronidazole -folded -bcs -engaging -ranking -colchicine -irritation -dereg -hardness -biodegradation -##iteal -preexisting -bon -urothelial -cancerous -guanosine -jejunal -heparan -albino -##cus -186 -##atars -##ava -##etron -adl -183 -##ifera -hsp90 -contributor -256 -morphogenetic -##orectal -chondroitin -sv40 -##odys -cddp -cytosine -##urred -##thra -arrangements -nematodes -##endoth -artifact -flour -asph -brass -practically -##lofenac -electrocardiographic -resonant -uf -appreciable -##ogue -amoxic -intentions -reimbursement -motoneurons -weighing -lacz -paran -portug -prison -denmark -##orrhiz -rotating -lichen -1960 -##ieu -##82 -germinal -rescued -mtb -intimate -microparticles -complaint -substituent -mont -vpa -wax -##odynam -serologic -atra -lysates -refers -doubt -k1 -intraventricular -##idate -##ip1 -cryopreservation -mucinous -gambling -enormous -graduate -antiarrhythmic -gastroc -autoradiography -##t2 -macrol -##otin -dysk -seroprevalence -##ithiasis -##ancing -suction -atrophic -propionate -021 -alex -sequestration -moll -vb -hyperalgesia -neurosurgical -subfamily -prism -unrecogn -interior -secondly -fitc -alarm -181 -023 -acanth -carn -pyel -gpcr -anchor -##otracheal -insertions -engraftment -examin -infiltrates -reconstruct -mucous -autocrine -lncrnas -burd -bovis -insult -triage -ppb -##tc -metaplasia -slowing -photoperiod -biop -pairwise -sof -222 -##iters -##ado -unsu -kain -rankl -tnm -203 -leach -morphologies -outward -superconduc -municipal -ugt -##μm -pis -try -##olded -pectin -pathologists -arterioles -fmd -carcinogen -##ugg -mutans -sleepiness -enfor -commonest -gluten -carpal -027 -hyperlipidemia -gpr -deer -antim -dhea -macromolecular -industries -unamb -tcdd -pixel -##articular -mole -dsa -interobserver -inoculum -fmol -tlr2 -oestradiol -##89 -xr -##ytoin -p2x -inconclusive -##apsed -persists -##romatic -overlooked -4d -##abdominal -traps -mm3 -formulas -204 -centrally -depths -##idation -pla2 -israel -##oplankton -cortices -lor -luteinizing -expense -##itidis -flavonoid -complexation -unpredict -##thoracic -lipoxygenase -##brand -bursts -tmj -##igo -exosomes -bam -##arine -chori -aldehydes -exactly -dystonia -h5 -thyroidectomy -printed -warrants -azo -tad -##opeptide -subdivided -##olis -headaches -speculate -vietnam -202 -burk -##gut -mcc -ctc -##uated -pneumothorax -helminth -hydraulic -foetal -transcatheter -billion -lectins -biocomp -attenuates -##ympathetic -zirc -diuretic -trichlor -dba -unambiguous -administering -retaining -##125 -translate -ancest -transmitter -singleton -tic -airborne -glycan -suggestion -##mas -elite -methamphetamine -minimized -##84 -gleason -##icated -principally -vn -remnant -odd -fasted -epoxy -unsatis -##ille -##ellosis -##word -strokes -thro -needles -hyperthyroidism -ivig -rubber -ovalbumin -ator -foraging -qualified -noncoding -lose -fun -##enh -oscc -##omn -sinuses -thrombolytic -synergy -encounters -codons -##tons -imprinted -##adel -serogroup -##brt -##ticism -succe -pz -glucosidase -ultrafil -attract -unt -dysfunctional -rumen -##rosine -urtic -osteoporotic -inequalities -##rein -171 -hyperv -skew -gpx -interacted -regressions -cbct -enterococcus -173 -passing -transection -nowad -nonster -halogen -etching -diversification -criminal -mite -##asant -indol -visco -nowadays -##epines -blunted -##uitry -migrating -unem -gastroesophageal -escc -liposomal -##ifiers -teacher -vertebrae -##mr -##acillin -bold -cannabinoid -advertis -##icit -accidental -##entin -morbidities -amplic -incidents -exertion -educated -##enzymes -##field -efferent -denture -##prof -dre -reasonably -leaflet -conflicts -##acies -##gamma -proceed -quadriceps -pathologically -##eve -malays -thermo -##oxamine -osteocalcin -rotator -competency -##avirin -lax -##iensis -##atech -atlas -disequilibrium -paths -exceptional -##eximide -quart -colorimetric -acrylic -##ecoxib -exocytosis -ptp -##ofil -neurophysiological -bever -closing -volunteer -salient -psycin -steadily -psycinfo -ossification -plastid -##uman -phenanth -arguments -mcr -foramen -company -rfa -yog -1976 -##regn -neonate -psoriatic -##robenz -##utions -pta -cycloheximide -abor -##ostat -concordant -pointing -unified -ovine -isopropyl -##elected -gdp -##icc -cd20 -norw -hatching -threefold -cres -myogenic -##olones -cancell -predator -greenhouse -##like -206 -chewing -wells -quaternary -tank -inqu -##ilance -1977 -algal -##etra -ecz -finland -##opamine -socially -hardware -dar -hepatotoxicity -payment -osseous -ready -incompletely -membranous -fishes -gun -hscs -fort -immunocompetent -pigmentation -adriamycin -leader -tremend -##ethylation -cascades -polyphenols -msh -ridge -##oprotection -atorvastatin -encompassing -diclofenac -neurofib -panc -questioned -blank -022 -emphasizing -##osurgery -tags -waveform -##92 -exacerbated -026 -##ydig -sephad -diaphragmatic -##onomical -staged -oxidizing -enucle -##bia -classifications -faecalis -l4 -sephadex -graphical -##etary -duty -biphenyl -exhaustion -fluoroquin -##aki -##itories -intraclass -pellets -##argin -##omycosis -mans -##cing -amoxicillin -gynecologic -uncovered -##oca -opn -bark -fiss -##omandibular -uremic -nuclease -nearby -reductive -exploit -sas -wilc -ribavirin -##ticle -snap -combinator -svr -##gc -staphylococcal -heritability -primord -representatives -aminobutyric -dnase -##alg -stature -compartmental -cbd -gill -eosinophilia -chemoattract -invasiveness -turk -fibros -##oencephal -publicly -##vd -mong -mra -##holder -apple -trypanosoma -##olysin -leaching -saudi -overproduc -##electronic -cronbach -clues -calcified -therapist -got -bout -kall -scars -##ql -condyl -fusarium -attainment -cassette -quinone -dext -184 -evidences -aliph -leydig -saphenous -vice -msi -absorpti -187 -multicent -decompens -pepsin -kong -condensed -mesoporous -##odopa -kits -markov -reversibly -##afenib -leptosp -##alline -mush -agglutination -isopren -##othing -phenobarbital -homeless -nts -##fly -bubble -interpretations -##oinositi -##arity -observing -lymphadenopathy -##awa -##etization -laun -nag -societal -separating -detectors -##astig -mountain -braf -libitum -cd28 -aeg -risky -constantly -letters -pumps -##plicates -recap -interrel -transaminase -##opathological -##antit -assemble -kp -reabsorption -affiliated -mycobacteria -histomorph -operators -##axin -zoster -cfs -kel -##epid -cpa -kat -##acrimal -perovsk -classifier -proliferator -probnp -cmc -culturing -economically -sarcoplas -arres -counties -disinfection -serr -expenditures -pcc -5000 -infliximab -##oco -lobectomy -binocular -##formation -hcmv -beverages -##ulline -associates -##otrypsin -haematological -deoxyrib -purely -cheese -ban -elastin -##ogenin -backward -egcg -psychiatr -hsd -thermally -disrupting -##alo -dural -nervosa -fermented -bacilli -calpain -anesthetics -sers -##occl -etoh -208 -eccentric -ornithine -hyperg -psychomotor -ribosomes -sacral -035 -latit -dysfunctions -sectors -##oped -pericard -entail -edition -tetrah -##iparous -constitution -anticoagulants -216 -##oresp -##itives -hindlim -ruled -discussing -dorm -holistic -gastrocnem -aliphatic -emitted -recreational -compute -antih -wille -pls -##aneurys -endocrin -##ofemoral -manuscript -##ellae -sharply -chemiluminescence -weal -##idomide -voiding -ultrasonographic -obsessive -##appab -##ronchial -##eptin -hmgb1 -lakes -shaft -ketone -ambiguous -tas -genesis -##fin -clay -wel -lesioned -##mn -vincr -consortium -simplicity -coral -absorptiometry -bacteriophage -##osterol -mesothelioma -damages -voltammetry -gout -##yelin -exagger -p300 -3000 -seriously -ankyl -gynaec -chop -followup -carbamazepine -urgently -certified -##imethyl -lingual -carboxylate -masking -thior -saharan -330 -recognizes -kap -##anda -contaminant -##agens -##rows -dnas -outlet -##oreflex -impulsivity -withdrawn -b16 -colloid -inev -producers -##ubstituted -tca -spo -##anum -worsened -##yline -bloodstream -##tigo -##adap -thoughts -immunophen -punct -annular -##aggreg -contracep -inhabitants -rifampicin -provinces -tuberculous -fluconazole -##ocus -correlating -##arboxylic -defines -osteogenesis -digestibility -commens -insecticide -##ecia -##anil -##endym -spectrometric -##aired -academ -abdom -##edullary -runs -wilcoxon -fascia -lengthening -801 -##oo -heightened -combinatorial -whenever -anxious -ssp -graves -dti -deprot -poland -provoked -posttreatment -##ynch -combustion -abi -ada -levodopa -endotracheal -electrolytes -modifiable -maximally -galectin -distinctly -hexagonal -0002 -throat -cid -lanth -multicentre -zol -outputs -ocul -proxy -lub -##rb -extravas -pediatr -parasympathetic -##acetamol -vv -ortho -intu -##oviral -partnership -contraceptives -xylose -pect -stains -fibrinolytic -hither -gastrocnemius -rins -willebrand -vincristine -encl -238 -categorical -wise -179 -214 -loh -gil -styles -resolving -semiquantit -sustainability -##adish -franc -governed -shortage -eut -##wi -##adesh -##analysis -hitherto -circuitry -versa -##ults -##oxidation -##asion -genders -unemploy -pollutant -isotropic -209 -necrops -cryptospor -leis -loosening -fellows -##ocyanine -bidirectional -associative -arthrop -##artum -unmod -stations -##ald -antimicrobials -hexane -seronegative -practicing -##uretics -meiosis -##osting -adopting -texas -nephrotoxicity -pine -restorative -pale -##tz -193 -tetrachlor -redundant -sustaining -furos -coincided -landmarks -dece -##tistic -tryptic -##ussion -##oconjug -vals -biotrans -ontology -##agglutination -##iodarone -generator -nash -vitrectomy -neuroscience -hypon -handic -oncological -glycolytic -hypertonic -dimerization -identifiable -exceeds -physiotherapy -regenerated -##onide -manufacturers -devoted -ligase -virological -rumin -artem -photochemical -##whel -hps -attraction -##west -##actam -psp -##trained -pheochrom -relatedness -feline -grp -benzodiazepines -complained -schistosomiasis -sio2 -##osyltransferase -hnscc -russ -carotenoids -lymphadenectomy -ultraf -generalization -##aro -click -splenocytes -##yal -spasm -chelating -nearest -##ariae -##side -rainfall -sst -##ateness -clp -nong -hypercap -anaplastic -##f3 -furosemide -microch -tlc -diamond -##obenzene -chose -intervertebral -airflow -proliferate -pupil -##une -eventual -phonological -##osperm -fle -synucle -permeabil -favorably -predispose -discordant -tma -caucasians -noradrenergic -reacts -drinks -##trac -herniation -official -overwhel -mmse -leisure -citiz -##kg -##ophor -metastas -horser -##ophthalm -pige -referring -prepro -streams -returning -##so4 -wait -och -concrete -heifers -microfil -diuretics -coexisting -##ume -esbl -carriage -toxoplasma -circumferential -raph -transthoracic -##odynamically -spines -##bra -homologues -vulv -compressed -rag -bifid -overdose -##osc -cleared -##aut -heterosexual -##otecan -nicotinamide -colonized -##erh -##ref -cyclization -internalized -beck -closest -233 -magnetization -broken -implicate -sarcoplasmic -compete -##pregn -facet -simulator -##83 -geometrical -p2y -horseradish -pmma -batteries -harbor -serology -##ophytes -##obacterial -huvecs -##umann -carcinogens -impregn -preferable -complementation -dcm -##omon -1990s -aperture -cardior -215 -paracetamol -empathy -concentric -outstanding -itp -transdermal -##eri -gard -dosimetry -intramedullary -myristate -regenerating -207 -contrasts -##oplasmosis -forec -myopathy -refuge -g4 -femt -attribute -hypotensive -barc -bq -nations -disadvantage -annotated -achieves -imidazole -navig -gingivalis -crosslinking -t1d -noncom -diol -##fas -##una -223 -urethra -##ofer -hindered -htt -resonances -fluoroscopy -cim -enum -hyaluronic -##oxycycl -phenytoin -realize -##erae -##urized -mptp -sleeping -epididymal -persistently -cdi -synuclein -##ostom -##alcoh -discipline -osmolality -linkages -contacted -toxicology -derivatization -harbour -##ometers -tubal -graduates -footpr -john -ost -##lt -##lyp -biomolecules -tram -##ubertal -toe -opposing -hbc -##iferous -intratum -natal -##oprol -tnfα -blots -cooking -clades -cep -receipt -diversion -##ophene -antral -antithrombin -request -reprogramming -awa -##ohem -cyn -gastroenteritis -glycosides -##ikrein -##101 -1200 -performs -increments -bell -17beta -##annab -221 -anesthesi -scrap -##took -skilled -stoichiometry -##ubercul -##bp1 -tunneling -locked -unsatisfactory -a23 -fic -css -dynamically -aberration -viscoelastic -308 -amplify -isoflav -antithrombotic -##urus -shelf -bronchoscopy -expresses -obstetrics -entrap -lncrna -adoptive -benth -snake -probiotics -agitation -n2o -slopes -wires -##adenosine -comprehensively -##avian -h3k -vire -migrated -myocarditis -211 -housed -hyperpolar -inr -unrecognized -pigments -bronchodil -##130 -##eb -340 -cd11b -##oeba -nitropr -hydatid -pvp -paradoxical -##ap1 -##othyron -##anter -nhe -##etrically -##aka -##entation -daughter -550 -correcting -##romatosis -mars -undertook -mitomycin -##pm -synthesizing -ces -nitropruss -cu2 -##encephalic -diminish -tables -##d6 -varicella -bronchitis -##len -lexical -palmitate -aborig -durable -##ochrome -muss -remediation -microscopically -nsaid -rsd -##iceal -dizz -asympt -10th -meetings -peanut -conduit -illicit -pparg -##ploys -chimpan -amiodarone -lycop -ligaments -losartan -ruth -rearing -nonsteroidal -ambly -qd -glutar -borrel -pva -kernel -remember -approaching -stir -shrimp -amaz -assesses -##odine -windows -broiler -sulfon -nitroprusside -centi -obscure -##avers -##ounder -##ogle -burning -constructing -##oride -qualities -leukotriene -slowed -polluted -ril -fret -institutes -194 -hbeag -cart -southeast -polysomn -dmd -##olide -vcam -hematoxylin -artificially -v2 -##implantation -##essional -prospects -ttp -alop -evans -stainless -seldom -uh -nanocomposite -phleb -terminology -##ott -##acyclin -##ibrate -##orthy -189 -inert -ccr5 -stenoses -##imolar -##okine -euro -##atases -hydrothermal -hardly -isr -##users -priori -wales -transs -##ystem -lux -buck -infecting -lhrh -##gest -290 -##etom -rendering -tape -##cortisone -##hai -autoantibody -1974 -##onary -enterobacteriaceae -029 -admix -##yelinating -glas -communicate -functionalization -rater -metaphase -yrs -phages -sweat -##dg -photod -relational -##erged -centred -requested -orific -tack -191 -unip -anastomoses -debil -226 -032 -hierarchy -diph -alleviated -ivc -ricketts -nitrous -nh3 -##anting -pheromone -autum -gan -##ologr -##timulatory -##ern -macaques -##ingual -economical -oncogenes -ca3 -emulsions -keratinocyte -a1c -extrahepatic -infin -membered -dissipation -ez -glycation -phytochemical -h2s -certif -##tigmine -thumb -##enafil -mentally -##icals -distilled -beij -electrop -discern -economy -nets -020 -pitfall -belg -anticonvulsant -##ensives -##agers -##ultural -##istin -##ounted -discriminative -##±3 -carol -draining -accommodate -unmodified -competencies -##itrate -employs -pyridyl -bioreactor -degran -momentum -d4 -argen -unusually -starts -roman -metabolically -lastly -##cv -effusions -fossil -##ecd -##ectants -advocated -rounds -contradictory -carers -microelectro -continuation -##bank -##itative -schiff -dissimilar -##han -looked -optimally -delineated -cime -zr -shad -metab -assimilation -##lich -meq -rgd -palladium -solv -preca -basilar -squared -microdialysis -glucuronide -lis -meningiomas -muscul -apr -nonsignificant -drives -psychiatrists -##vinyl -cones -chair -trehal -fuller -##plate -myd -##accum -nanocomposites -prohib -debated -incisors -unpredictable -stance -milieu -##trig -##uel -##ein -##erally -vad -ercp -antioxidative -capacitance -urod -##vic -subclavian -micronutr -unresolved -##omedial -optically -congruent -##apsular -cardiomyocyte -##olymer -##ifl -casual -disagre -##nb -astrocyte -##lass -cimetidine -seat -longitudinally -justify -##s1 -nanotube -sclc -cmax -ggt -##ylaxis -hur -phosphoinositi -upright -psc -reservoirs -stellate -castration -bcc -glc -sarcop -aflatoxin -adhere -1980s -confinement -##onent -##fully -##osylation -##rednis -workup -##usate -upa -concludes -weighed -##ulence -eczema -nasc -incentives -iner -##rotal -218 -turning -modulatory -abut -doxycycl -##yly -mdct -pick -hospice -putamen -justice -##aban -deteriorated -oligosaccharide -ltc -cognate -biocompatible -##adecan -grazing -##oxygenation -seroconversion -fatality -##ynucle -deceased -sprou -freund -staphylococci -eo -##olem -file -antimal -##years -##uin -printing -##oprim -disabling -cbs -##days -spons -hydrocortisone -confid -##kii -interch -anode -crystallography -##ellulose -ltr -wealth -went -anxiolytic -xx -merely -grouping -inhibin -misuse -fundament -calcine -##orel -##ceptive -##buminuria -hypercalc -irrele -iaa -##orhabd -positives -immobil -inflation -##usor -want -astigm -systemically -hemip -dav -unwanted -predators -descent -appropriateness -##tervention -dms -sterilization -trou -##utter -sars -clamping -demyelination -2s -hon -photons -proteoglycans -phasic -cooh -uridine -glutaraldehyde -4a -sliding -036 -mns -enterocol -antenna -##pv -nonsmok -timed -scaled -epc -workflow -taq -dollars -litterm -piece -denatured -045 -##guan -gtpase -pleiotropic -chow -prevailing -wanted -xylan -profoundly -##tory -thinning -shoots -alcoholics -exhal -enj -calcineurin -triad -fits -##600 -deployed -##ispers -##oons -games -310 -plasmonic -ccl4 -analyzes -fuzz -mutually -yearly -##oidy -##ieving -##omyosin -2n -aneurysmal -doxycycline -##angiogenic -##othyronine -heterochrom -names -crispr -metatars -mglur -hpr -adsorbent -##parametric -cholester -nebul -298 -ctr -varices -anaphylaxis -rfs -atherogenic -##gtt -temperate -locking -epidemics -epitheli -deafness -legion -4h -popularity -dizziness -##itz -fragmented -unfolded -protonated -##osystem -hyperplastic -lon -##ilyl -exchanger -abo -pst -##opancre -wav -##reek -254 -enantiomer -chloroplasts -haemophilus -oligodendrocytes -##muir -floral -avers -hydroxytryptamine -communicating -complexed -pieces -##ophenol -obstacle -instantaneous -##μg -alve -crosses -##fast -benefici -troch -esthetic -344 -methylprednis -xa -recanal -blastocysts -cystatin -waveguide -tamp -##aba -noxious -235 -##otomies -##apillary -langmuir -opg -jejuni -##ysts -authority -lyase -pharmacist -exaggerated -maternity -tympan -flame -langerh -aj -##oreactive -upward -a23187 -##othermal -dinucleotide -quadratic -relaxed -coils -observable -pds -##phig -ingredient -##azem -icsi -hemispheric -hemispheres -antine -pbp -chemoradiotherapy -##oxides -tyrosinase -parac -bps -##elective -victimization -communications -##drs -iddm -colp -##ecrosis -##angeal -cd10 -##iformis -##esters -allyl -switzer -burdens -acidity -chymotrypsin -adrenocortical -mastitis -villages -wherein -locate -exped -entrapment -derivation -mst -callosum -c8 -##rem -tremendous -haemolytic -snow -stopping -##xime -langerhans -centros -pemphig -budget -t7 -psd -##ocardial -qtc -oropharyngeal -##coagul -catabolic -switzerland -antinociceptive -diagram -reflective -cyp3a4 -rejected -facts -quadrant -β2 -psychotropic -pna -##itazone -irrelevant -persisting -cfa -polyamine -mastic -##iro -cyanobacteria -zn2 -fusions -transactivation -cathode -qi -astigmatism -##trum -pruritus -##osse -granulomas -##imers -stigm -meningioma -std -046 -##oresistance -031 -perineal -spectrophotometric -verm -##orr -ligated -familiarity -efs -##foot -anabolic -miscar -s6 -nab -ssi -zik -responds -electromyography -##pharm -computationally -undesirable -viz -##ysm -##rett -conservatively -listening -exposing -##adjusted -prokaryotic -sdf -##at1 -##arf -florida -hypocal -histones -mz -dihydrop -aunps -##erus -campaigns -prostacyclin -##ethal -threats -ctla -concentrates -clad -ecules -exponentially -zona -pond -methylprednisolone -junior -turns -##r3 -immigrant -circle -hirs -evolutionarily -cilia -##assemb -copyr -vasospasm -imported -corneas -gonadotroph -hill -endorphin -##antigen -##entgen -sirt1 -##accept -readiness -dxa -passages -##tina -bacteriological -probed -##hou -micelle -pco2 -alloys -2019 -##h4 -##isco -##acid -##81 -theme -neuroprotection -brings -curing -dht -actuarial -meso -irel -neiss -##olization -##board -037 -alkaloid -311 -homologs -##gow -nominal -fasl -manure -beijing -police -##iciting -refraction -trast -##dine -myoglobin -copyright -spirometry -##rodesis -valine -##03 -bmscs -ireland -prematurity -temporomandibular -lvh -insecticides -nystag -myotub -dysplastic -matches -establishes -retinoblastoma -##enet -emuls -humeral -e6 -machines -slide -##tists -agglutinin -antid -corrections -cm3 -discriminated -herm -aptamer -homod -chondrocyte -autumn -##tiazem -375 -keratitis -##esthesia -rw -unic -ipf -quartz -optics -releases -fumig -traces -tee -ch4 -##serine -213 -asians -electrosp -aspartic -##beta1 -frank -demyelinating -hemostatic -applies -comments -##opharmaceu -034 -olanz -##ao -discoveries -dorsolateral -alongside -##breeding -posed -defl -sao -##hydryl -##ropic -ancestry -separations -gang -behave -cages -segmented -acinet -cholestasis -merits -multilayer -carcinoid -agnps -monod -##ocations -nanofibers -##arctic -carotenoid -##ureth -distension -##oglobulin -##ront -##bryonic -enal -vow -ministry -neuropeptides -listeners -saa -stringent -metabolomics -038 -trastuzumab -olanzapine -≥1 -destin -zeol -hav -unlabeled -##child -cholerae -1973 -mmr -palatal -tumoral -fuzzy -##dy -##issible -fibrinolysis -##imidazole -distract -##ouses -bibli -kyn -##transp -homogenate -mch -acquiring -activin -synch -##uccin -speck -##biased -dibut -congestion -alopecia -bee -trimethoprim -relieve -dlbcl -##haem -dilute -rhodamine -##havi -subn -unil -obt -wildlife -gonorrh -rubella -cina -##obronchial -hypoxemia -##phosphatidyl -##biotic -spending -modal -##x3 -ceft -noticeable -explanatory -tcs -haemodialysis -myasth -guest -##vin -potently -aas -ferro -033 -reads -screens -diltiazem -ruthenium -asynchron -tort -##otherapies -zoonotic -protozo -hybridized -plating -hco3 -bland -dermatology -simpler -grounded -acinetobacter -citrus -pace -inequality -intraoperatively -##ymm -##arcomas -glasgow -ppp -mcm -cyano -logic -cardioresp -neurochemical -urokinase -stalk -##oprolol -melanocytes -malate -photoluminescence -##my -specialties -dk -tmd -unresectable -translocations -insec -wavelet -##umber -048 -risperidone -cardiorespiratory -sinusitis -pvn -pumping -convinc -predation -##ugr -organelle -solvation -cystectomy -##ipenem -subma -chemoattractant -##can -ddt -calorie -exhaus -##prop -##omorphine -flatten -vertigo -congeners -roentgen -##osteal -4000 -anonymous -exploiting -kan -apd -sentence -##angitis -##imeric -hypoglycemic -pbc -ell -transparency -dpph -extravasation -terminally -ipa -tace -dnp -##etomidine -argument -gabaa -loose -unbiased -232 -pacap -vz -norweg -##atia -tpo -icg -arthroscopy -aca -organizing -insemination -##imumab -marks -formyl -saturable -exchanges -##illed -developmentally -hematuria -asphyx -responder -xer -implied -##isen -enthalpy -neoin -aband -##ila -appreciated -tmp -presump -##abl -bioge -biotic -hyperint -cystein -##eto -fluency -bent -probands -lymphoproliferative -##renorphine -photosensitiz -orex -valley -lov -amil -224 -quadrupole -##cnts -isothi -avenues -##olaryng -##omyelin -hemic -permissive -connect -intracereb -ripening -crust -termini -reside -##repres -membership -hyperpolarization -quoti -aluminium -##oproteins -isozymes -coincident -nystagmus -##ermined -stimulant -omeprazole -fcr -superimposed -turkish -sre -rises -##openic -maxilla -anf -lipolysis -corrects -##etitive -vo2max -##osts -hindlimb -##chemic -unresponsive -##indole -chyl -nanoshe -photocatal -subspecies -sativa -undiagnosed -shrna -asymptotic -cau -endarter -dibenz -contributors -##ho -argon -##wood -nw -basophil -seeding -420 -subtotal -slaughter -unstimulated -unselected -cochlea -tia -callus -s3 -##gers -##insically -##ocere -phosphates -graphite -poster -apa -octan -pursuit -##atide -##ithin -habitu -estrus -granulation -wasting -unsp -diary -influential -sz -##inoids -cx43 -inflow -exocrine -hypok -tp53 -complemented -habitual -athle -speakers -##diagnosis -##ospinal -carrag -##flora -verb -benzoyl -##iplatin -immunod -pharmacologically -sows -naphthalene -aboriginal -azide -aerial -microbiology -uva -decide -aphasia -hub -carbapenem -phthalate -prod -iodo -intrag -recurred -compelling -phac -xps -biceps -streptococcal -pao2 -gavage -detrusor -fasci -enalapril -regi -nonsmokers -intraluminal -pmns -##ollagen -barrel -elicits -detects -##oposterior -mate -1950 -pseudoaneurys -##eor -orph -##kat -fried -weighting -propos -pps -manager -eliciting -cannula -##tert -neurobiological -enjoy -powered -camer -##anar -buds -047 -photoreceptors -pipeline -##iry -childbirth -##imentary -##orbital -finnish -pgp -##adiaz -butanol -##iiod -capsul -autistic -##oacetic -650 -conspic -innovations -sulfated -dissatisfaction -manipulate -interrelations -xy -octre -downward -reapp -carrageen -nitrog -##othione -mimetic -reef -blend -##car -anaerob -mansoni -truly -ignored -parturition -##acral -chemosens -strictures -polyvinyl -fischer -traced -osteoblastic -at1 -realization -amiloride -msa -curved -solutes -escalation -comet -##abetic -##phone -cryopreserved -dialysate -uniformity -##mar -extraord -##urface -attempting -##obiliary -destroyed -##lycerid -##oglut -underscore -triiod -haploid -durability -pdms -caregiving -overcoming -infrequently -c7 -kallikrein -tetrap -biotransformation -stimulator -lysate -scarc -thrombi -antich -contrasted -femtosecond -tolerate -epoxide -getting -tracers -glucosamine -commentary -hologr -peptidase -##opterin -bronchiol -porphyr -myocl -contracture -lacrimal -densit -ccc -##ophyte -walled -sentences -dub -##mph -##ismus -citric -mx -cinahl -kras -estrous -antiinflammatory -embedding -##annel -nascent -contiguous -huvec -barrett -ambig -oxaliplatin -##admin -phyto -equivalence -virions -039 -gwas -≥2 -compliant -accumulates -##reen -dobut -##king -tuberc -##odynia -odn -academy -tz -##d4 -365 -actinomyc -seventh -urological -parkinsonism -##itical -cylinder -hydrated -laev -bleaching -hatch -inducers -baumann -##roventricular -micellar -##6a -microinjection -vascularization -accomplish -##osseous -217 -monolith -imperf -##western -anca -g0 -maxillofacial -unadjusted -divisions -meniscus -unaccept -waveforms -niddm -extant -integrates -manufacturer -ops -##tices -rtms -sab -zip -rls -exo -##entioned -##gly -ultrac -pegylated -thorax -##rum -formerly -sulfhydryl -yb -##ozoites -norwegian -239 -dobutamine -fibromy -iugr -kines -hay -stressor -pparγ -##urge -opa -semiquantitative -##eping -cch -palpable -##mb -pfge -universities -intracerebroventricular -locoreg -infarcts -stereotyp -moderated -laying -argues -submaximal -tig -sati -apomorphine -geor -ppt -gob -fission -tps -homogenous -pentobarbital -e7 -dnmt -genotoxicity -accelerating -metagen -##elt -reposition -chile -ultrafiltration -story -radon -##osylated -sphingomyelin -aseptic -bridged -tracheostomy -raphe -5a -submandibular -hispan -mcg -popliteal -imping -mainst -##isy -equilibr -##orespons -pellet -##unts -cultivar -unload -##erts -##biosis -nil -anchoring -ideally -localisation -dichotom -##feri -amphiphilic -micromolar -advised -thicknesses -##encephalon -hypogonad -##ronic -azath -##olstein -candidiasis -fviii -lmp -csc -##izability -accommodation -nonresp -mace -storm -##opar -##aks -microbe -npv -c16 -faeces -hydroxybut -ptca -hematopoiesis -chlorhex -##ype -paramount -##lorinated -380 -deeply -##illes -cq -##ighter -s1p -t0 -##igan -thermost -cnts -##ildenafil -##alcoholic -thf -opto -mantle -directors -imprinting -herbs -275 -##uis -responsibilities -mn2 -topographic -bees -wisc -3b -radiolig -nitroso -epidermidis -carib -immunologically -nnos -##gp -marriage -ptb -hypoglycaemia -gay -oxidored -propria -sink -chromophore -##ynyl -incompatible -azathiop -solubilization -rarity -neuroh -7th -neisseria -crime -localised -rebound -fon -irf -mailed -hcm -seedling -brit -pml -oscillator -heterodimer -neutr -biotechnology -gase -neurovascular -naming -tga -transesophageal -tubul -flick -reactors -photoin -reinforcing -perovskite -chlorhexidine -wish -oncologic -extracting -injectable -fecund -neurology -polyposis -gef -royal -initiates -retire -zw -favoring -demethyl -##oniaz -iranian -patterned -kyph -continence -##quinoline -##azoline -phosphorylase -bangl -##ths -achievable -dor -vagina -urgency -infestation -##reless -370 -##ouns -immunofluorescent -aneuploidy -periodically -deae -bangladesh -abortions -transmural -prazosin -##okines -oldest -anatomically -subdural -pathophysiologic -buffalo -midwives -hypertrig -##anone -manipulating -##eliness -biodistribution -millions -smd -intrinsically -480 -mites -hypoc -##inflammation -cem -aspergill -cushing -transients -contracted -##bec -digest -pitfalls -sildenafil -electrochem -hnf -uranium -colored -##i1 -adn -notew -counteract -electromyographic -piezo -instrumented -spiking -humor -pgs -##oton -sos -logical -##okinin -inferences -acetylated -avoids -##iet -spliced -identities -##oflavin -biochemically -nadp -inductive -supplies -debilitating -biomechan -tertile -bmc -nitroglycer -##methylation -maneuver -##pb -drawbacks -preg -homosexual -cholangitis -sider -##torh -thiored -228 -estrogenic -compromising -ed50 -presumptive -kcn -evacuation -convex -prrs -##ultured -fractal -defensive -indeterm -cadavers -photosystem -crush -isoniaz -##oduoden -sonic -##osi -annulus -azathioprine -baumannii -knowing -scru -denaturing -butter -diat -lymphangi -burgd -##κb -ict -ependym -seemingly -##uronium -balf -unbound -adapting -cba -##azin -phenotypically -controller -adenylyl -##omib -sorafenib -hypophys -lyme -##operfusion -radiologist -wrong -licensed -transvers -vsmc -clip -baroreflex -niv -tpp -runoff -abduction -burgdor -##ellin -heterocyclic -hydrolytic -repolarization -caen -##ogastric -prag -merit -gj -phosphatases -corners -ptd -pgi2 -##etus -manufactured -intensified -metabolizing -claimed -hypercalcemia -##omening -unheal -wireless -uncoupling -##centr -##endothelial -pach -##h2o -noteworthy -holstein -mof -google -##ucine -dichloro -cryptic -##orptive -potentiate -##yer -unco -ags -penal -relied -vsmcs -leucocyte -investigator -##professional -poc -beneath -sulcus -##bw -interconnected -nrs -pleomorphic -oligomeric -bacl -globular -burgdorferi -jak2 -##orhabditis -pns -transurethral -naa -calcifications -sulfoxide -n3 -degranulation -##ervical -court -heterozygotes -##db -##access -245 -hypertriglycerid -boots -characterisation -##adol -##rology -lett -meter -##cysteine -3beta -nci -mbl -shortest -##folate -isomerization -pyrophosphate -precipitate -progressing -tumorigenic -so2 -b3lyp -##aea -fetoprotein -##transplantation -tailed -##bear -purpura -trehalose -mission -##trid -dihedral -swab -##omethane -hcs -00001 -##trex -dac -utilisation -##ennial -##ostosis -budding -buprenorphine -lpr -##otap -glycated -ale -amend -occlusions -ki67 -geographically -spasticity -your -sexuality -underpinning -incisor -reinforce -chronological -##ran -##immune -phantoms -##1a2 -##arth -rivers -thanks -demonstrable -##etries -##sm -adjuv -hyponatre -berg -thyroiditis -agn -guarantee -singular -proto -commiss -##uma -mlc -biv -gaseous -vascularized -myelodys -##urities -classifying -connexin -visiting -ahi -rhoa -vivax -polyt -##af1 -fak -##idines -tan -042 -dystrophin -inefficient -##180 -phenomenological -recapit -roi -palmitoyl -##cription -pld -appeal -219 -convey -isoniazid -dysregulated -facets -assignments -svm -anteroposterior -##odin -transposon -##amyl -pgc -allop -washington -##ovi -rheological -##cel -club -embolic -##arabine -website -ay -k2 -halo -homing -##orient -collim -##rogenesis -serovar -ryan -consume -delineation -##ivirus -necropsy -##rosthetic -##ospermia -##burn -fabricate -euc -grounds -permitting -player -enzymatically -sax -ish -novelty -ssr -##iper -tut -instructed -village -##arche -unexplored -styrene -##balanced -##q11 -artemis -discontinuous -histopathologically -resins -symbiotic -peep -##ondral -categorization -transv -##urrent -043 -##children -anaesthetized -stillbirth -streptomycin -postp -afterwards -isothermal -##00000000 -valgus -sjogr -##bv -##n3 -genbank -ryr -##ectoris -nonex -234 -leucocytes -externally -abundances -##atids -pept -joining -nyha -##yled -achilles -pvr -neuraminidase -grape -urticaria -##ourished -certainly -##ometabolic -##ematous -microglobulin -taught -aversive -multicellular -##293 -##romegal -##osable -involunt -##activated -cd56 -##esin -albuminuria -contraindications -relieved -eliminates -##lc -architectures -certification -nicu -keyword -mpp -##ffer -transcriptionally -orifice -invading -cannulation -v3 -synchro -heritable -negoti -chat -esophagitis -##ensory -hgh -broadband -hydroxymethyl -ensures -tenderness -ncr -calcane -endodontic -sps -##ca1 -phosphatidylserine -sjogren -6th -cd19 -api -acetaldehyde -visibility -241 -shh -stomatal -octreotide -dephosphorylation -prick -hospitalisation -auxiliary -##igenin -rit -incar -innerv -neuromod -affordable -nanorods -hun -immunosuppress -thai -forth -##ochondral -perfusate -##tri -stn -acclimation -vitell -avr -##ocortex -scrutin -bats -##onol -midgut -bottlen -mmt -sensorineural -heel -telemedicine -aggressiveness -##radi -isoenzyme -incap -##being -glucoside -5s -coul -aii -##ternal -classroom -##oting -immunoassays -microorganism -directing -morphometry -urease -diphther -myoblasts -##opolymer -trpv1 -rotations -postulate -##acyt -news -locoregional -breakfast -sulfonyl -frustr -049 -progresses -mesoderm -##osclerosis -##hom -valued -ppargamma -cyto -##ollen -fibril -##ells -##actyl -epcs -##elong -calculi -prepubertal -proceeded -hernias -invertebrates -pakistan -##map -tetramethyl -bacul -immortalized -wellbeing -tdcs -biosensors -omp -xp -##amoeba -taxonomy -hispanics -vene -bpm -##onins -pectoris -hedge -##owed -deuterium -decel -subdiv -ip3 -fimbr -##d3 -nitrophenyl -thall -slip -sant -maxima -##zomib -##othi -229 -##hog -deaminase -cb1 -ambul -miniature -conventionally -imipenem -interferes -##verted -magnitudes -##tress -triterp -fts -##omplex -conclusive -wd -##avicular -adhered -clarithromycin -polyg -##ucting -lactis -rage -##porter -1966 -##fluorescence -laevis -##pot -##osh -molecularly -rectang -ihd -vasoconstrictor -##ivudine -mcd -kilob -ancestor -quantitated -polychlorinated -dates -whey -3β -2m -phosphoinositide -amni -pheochromocytoma -golden -approx -##fts -dosed -exf -say -##orylated -ebs -minnes -##amido -predefined -##cc1 -penetrate -spondylitis -reliance -terp -anoxic -jew -adrenalectomy -##reatment -acrylamide -##azide -tbars -##lock -titre -##othec -offenders -sunlight -##position -practiced -rectif -iap -synchrotron -##itil -huntington -entrance -modelled -therapeutically -##apore -microsurgical -overwhelming -##itably -myocyte -##kl -ricin -arctic -dsp -##937 -beating -biotinylated -p50 -##endazole -tst -penis -##electroly -default -jurkat -20th -interrog -irrig -dipyrid -antiphosph -##coprotein -panels -migrants -cardio -ploidy -##kinetic -##erson -s100a -cryptosporidium -lactofer -replicates -framing -myelinated -lysophosph -phosphatidylethanolamine -caenorhabditis -fap -##isk -glycaemic -##rote -retirement -##apto -sons -orches -sclerosing -ssa -citizens -equivocal -041 -crystallin -amoe -compiled -isotonic -##delta -progestin -##lessness -editor -acromegal -radiosurgery -bdi -og -accessed -##retion -minnesota -money -bca -depicted -fingerprint -mindfulness -rug -restrict -##mics -borders -carolina -displacements -heavier -mdm2 -gss -deterministic -lubric -extensions -athletic -dentition -##tiveness -beneficiaries -nephron -hypnotic -caf -myb -promptly -##orphism -dmf -bulky -modifies -confluent -##osities -##itrite -ogtt -epithelioid -vzv -mmf -tones -controllable -##olinergic -avium -fibrillary -glucone -##ographies -drastic -anteced -pdi -overlying -degenerate -fibromyalgia -viremia -qa -fragility -##ophytic -mta -##occlusion -cour -ceftr -236 -negativity -sectioned -mdma -defenses -flavor -laminar -mainstay -facilitators -mine -##2r -urodynamic -ner -forel -nadir -##cryst -clath -17a -sedative -compass -##missions -bioassays -fluorine -ginseng -tampon -ccd -stump -mthfr -ui -universally -gynecological -indexed -nf1 -entang -serially -248 -agend -ceftriax -urs -endarterectomy -pleu -hypothermic -administer -fluidity -riboflavin -interspecific -thioredoxin -extubation -##onecrosis -enhancers -igh -herbicide -town -draft -floating -acknowledged -##alasin -kre -f3 -matr -nonrandom -amalg -##acholine -originates -immersed -gm1 -mehg -eigen -replacements -carboxylase -##ifferentiation -##ystitis -youths -endocardial -##ostigmine -cycload -nim -lactone -##opram -corroborated -egr -##fv -supervis -##romed -##odomain -arte -enterica -##3t3 -antidiabetic -##behavi -sid -mbc -impurities -adenoviral -##oproph -indeterminate -perone -strugg -landf -pear -unavail -lyophil -pant -##arius -refine -syringe -##oning -##rice -##italopram -cory -hapt -pthr -antimalarial -##arded -amh -##welling -ngs -adventi -##tite -usp -##afted -325 -rps -perpetr -caspases -appreciation -genu -strab -hypermethylation -##idian -juxt -##s2 -kup -carbonic -tracked -kiss -meteor -intratumoral -accreditation -##antes -trying -preload -perforated -party -blasts -quartiles -fractured -disinf -singapore -antitr -ductus -nonhuman -dioxin -cholecystitis -repairs -door -lifelong -aro -swollen -modifier -proceeding -hba -mos2 -ceftriaxone -humerus -offs -mutagenicity -mediastinum -potencies -willi -ht1a -latino -estimations -diffusivity -##atial -condyle -##tilb -##oronary -fcs -elaborate -abbre -weaknesses -b4 -cus -proprio -##ureter -##anserin -craving -preparative -transfused -cecal -##n4 -##individual -##tigens -saccades -kyoto -racemic -cholecystokinin -sorted -buffers -cach -impr -metacarp -balancing -##±4 -phylogenetically -johns -interphase -scleral -inception -##stalk -markets -isoenzymes -dsdna -sgc -p75 -##aploid -otolaryng -asthmatics -landmark -exceptions -undetermined -tos -readmissions -##ohep -nanotechn -macron -##baric -explant -prehospital -demethylation -sulfameth -##ih -quadruplex -##acoustic -unprot -exercised -hydroxyvitamin -abbrevi -##jd -seeks -encephalomyelitis -duplicate -protonation -##iors -glasses -victor -##bos -synchrony -243 -coadmin -fra -shigella -##ocalization -tcc -neurofil -aforem -metallothione -atria -liability -aforementioned -angio -030 -odc -kenya -microflora -mdr1 -coales -anoph -##agal -duck -heights -maltreatment -marm -cholangiocarcinoma -##antigens -##oporphyrin -##ethane -044 -amyotrophic -##entilation -retinoid -1972 -pwv -oncologists -mend -##ypti -##ocentesis -colomb -mvd -hedgehog -##win -##ostal -hyaluronan -hallucinations -cdh -appendix -nanog -131i -lacun -rbp -##rowing -4b -orthostatic -gus -beverage -227 -##apical -gynecology -cholinesterase -homozygotes -##odyst -yers -15n -##126 -incarcer -fbs -rer -##ylv -telev -concerted -##adaptive -powders -hyponatremia -zh -cords -interindividual -shunting -vigilance -audio -astrogl -ferrous -242 -tall -brucella -hib -thiamine -exceptionally -1ra -viscous -symbion -flare -##oplegia -proficiency -keratocon -##enoids -##the -stear -ringer -unhealthy -##orbed -incidentally -hots -##aters -quail -gp120 -##arterial -byst -alkylation -gdnf -ionized -##alkyl -##onscious -##apsules -##nu -##erule -##anolic -cholesteryl -nitroglycerin -rolling -##aginous -petrole -pnp -##cement -humic -hyperoxia -pga -fluctuating -tfs -coiled -##ovulatory -rainbow -##iaa -spermine -exploitation -meniscal -##standing -emerges -tsa -log10 -innervated -inactivating -microalbuminuria -attenuating -bootstr -spectrophotometry -broadening -flush -##burgh -##duced -accelerates -##men -archaea -##renic -partnerships -##oremed -fecundity -##ospatial -levo -nucleophilic -lrp -##506 -cinnam -luminance -alfa -subgen -lacc -adjuvants -252 -ici -pread -seropositivity -##bearing -fron -nonl -hyperbaric -cardiogenic -cmt -##iclovir -cardioprotective -##hard -##critical -osas -inactivity -bottle -azithromycin -transcriptomic -scleroderma -nomen -advocate -hourly -stokes -##opathogenic -incisions -errone -cyp2d6 -carboxymethyl -clarity -astrocytic -neocortex -bacteroides -adenomatous -cci -recanalization -disks -densely -##olipin -lfa -nanowire -dexmed -methacholine -tetramer -petroleum -asymmetrical -fibrillar -##eight -inaccurate -exhaled -1970s -##othermic -fe3o4 -clind -ketones -runners -pcm -magnification -vacuolar -dipolar -antarctic -cine -synonymous -hydroxyproline -prevalences -pragmatic -hysteresis -triangular -glur -shortcom -languages -crosstalk -syntheses -polyneu -shortcomings -assault -##ecies -rheumatology -##arietal -acetylgluc -elaborated -bend -dcis -mesenchym -mechanistically -##exy -replicating -hcy -thicker -temporarily -dyskinesia -dexmedetomidine -chb -intraves -shuttle -viewpoint -privacy -sarcopenia -decis -postex -tgfβ -##oblastomas -eur -imipramine -##reshold -cytolytic -cargo -≥3 -##cb -ictal -##igel -stoichiometric -##ongru -lncap -cd1 -multifacet -sns -thiols -misdiagnosed -antiphospholipid -trus -furn -255 -##ecium -fulm -mitigation -benzoate -##116 -rnfl -u937 -acceptors -11c -anchorage -bisphosphonates -involuntary -rx -overproduction -ryanodine -chagas -absorbing -specialization -##azid -calv -multiplicity -##riers -##etting -breakage -shielding -formulate -questionable -##outs -clindamycin -edible -pione -##agle -metamorph -advancements -##trexone -nsp -actinomycin -ipr -crowns -implicating -dentine -recalc -demineral -celecoxib -##ytoplasmic -singly -quotient -soccer -mold -internally -radioiod -multifaceted -isth -operates -electronics -oocysts -##epi -##ohyd -externalizing -comfortable -russian -nymph -epilept -##architect -sce -##erea -aedes -nms -dehis -jointly -syncytial -infinite -endos -isozyme -##osmotic -covariate -isomerase -manufacture -##esartan -259 -##onated -casting -brand -schistosoma -fumigatus -bifunctional -##fused -littermates -0003 -photolysis -crab -ohda -##ifolia -strengthened -darkness -michaelis -imperfect -polyaden -rech -##inately -aerosols -##eland -mushroom -##cessing -pressing -vagus -snails -##h3 -##nts -265 -multiplication -gamet -##astigotes -iri -inductively -##ivial -##iptyline -condylar -plp -gig -##rotoxin -columbia -ncc -shel -intervening -nomencl -pend -borte -##reported -parall -tricyclic -cbp -suspect -meningococcal -peptidoglycan -tetrod -##fu -##acological -c1q -##aval -incongru -phagocytes -hac -emergencies -denitrification -bortezomib -##lational -neuroleptic -peroxyn -endings -interdep -cst -diacylglycerol -simian -enth -biob -1s -transvaginal -fourfold -blockage -retains -lactoferrin -nem -neuropathological -neuroinflammation -dyslex -dsrna -triiodothyronine -##oagulation -##icus -coordinating -hbo -nut -##itidine -underweight -baclofen -chances -preparedness -##ealth -phal -telomeric -concussion -##junctiv -##eno -##atography -gor -juveniles -polish -neurosurgery -deacetylase -accessions -##port -##erves -flocc -cognitively -bronchiolitis -deactivation -exudate -periodicals -multitude -fenes -metoprolol -edent -fixing -##rad -reassess -cd133 -renders -pdc -duplicated -##adate -##odone -251 -liz -##ituric -##hydroxy -##akia -pork -claud -f4 -##apheresis -##inesis -nucleosome -spt -denv -flip -accessing -epididymis -rgo -dce -tj -cbz -##tinib -senile -quiet -##etermined -fulminant -ovip -ultrafast -nonalcoholic -narcotic -dating -##adecyl -266 -nmdar -misle -vibrations -usability -##capes -melt -neurobehavi -startle -tetrodotoxin -save -monocytic -villous -237 -gratings -hemangioma -disparate -perturbed -discourse -partum -specify -rooms -formate -dentist -scopus -##ancerous -nanosheets -sis -allergies -polyamines -##lm -researches -hca -characterizes -transducers -vocabular -emit -predominated -##meter -facs -iud -biomaterial -qsar -relying -aspergillosis -encompasses -##menting -unrel -##oremediation -lithotr -capturing -6m -suite -cort -curricula -##alog -cvs -naoh -dilemma -315 -shocks -sublethal -transist -crass -laid -##graphs -cacl -##opharynx -grows -hydron -laterally -underlies -anesthesiologists -carrageenan -##opically -chlorinated -enantioselective -segregated -arthrodesis -depolarizing -scotland -charco -shunts -##transplant -##1p -vortex -arteriography -thawing -polyelectroly -##inum -conspicuous -jh -sight -rcbf -booster -triacylglycerol -immunoreg -rhodopsin -##orrhizal -toxoplasmosis -lamivudine -##ivir -speculated -##romazine -projecting -argentina -dma -indones -lmwh -##avidin -spaced -pedigree -farming -bubbles -cancellous -convulsions -belt -##edoch -endocytic -cyp1a1 -##ophyt -excurs -denervated -weaned -phr -monitors -adamts -##uer -wakefulness -##rosterone -circums -pediatricians -pdl -myasthenia -lind -spill -wheezing -columnar -glucuronidase -territories -##arrhythm -nanomolar -causality -##oven -colectomy -hemorrhages -ghz -##5b -inbreeding -##ellites -##ectant -suckling -neuroanat -infra -##omponent -absc -vasodilatation -##tins -pav -##omel -milder -myotubes -##alic -##butamol -wg -soldi -torr -aegypti -cscs -affords -##ht -hypercapnia -tamponade -nonadh -supras -vign -##encl -##space -physico -zirconia -pthrp -intentional -kev -pediatrics -##lete -valproate -syll -thawed -##thio -##quat -paa -inflammasome -##lymph -irritable -ther -calving -##edral -bimodal -pneumonitis -##eic -##olon -globe -sms -allodynia -fura -##opeptides -##ogloss -##uters -ipc -inulin -genit -equator -msv -thoracoscopic -##rolateral -246 -rrs -conceived -reentr -##yletic -unprotected -dione -##oty -qaly -##ocs -##sil -stands -salbutamol -##pregnant -peroxynitrite -amazon -nbs -hydroxyethyl -hemophilia -myopic -microvessels -street -initiator -obligate -television -##idosis -counsel -##oide -##axanthin -##eme -overestimated -nord -249 -##ectivities -thyrotropin -recovering -qtls -earthqu -internalizing -##onus -##liptin -bottleneck -trapez -mvc -040 -cld -plasmacyt -2500 -tung -diamin -evoke -nucleolar -fundamentally -repeating -transformants -caribbean -##wl -wastes -##equencing -arthro -cd38 -dystrophic -vg -invariably -caveolin -##itizing -charcoal -unravel -delaying -sock -244 -formats -pyrolysis -emin -psii -bpy -cpm -hek293 -captive -shells -schoolchildren -cardiometabolic -##7a -tars -empowerment -##agenic -extrapolation -itrac -amik -equity -295 -query -eicos -tetrazolium -##qs -##⁻¹ -##coming -##body -canopy -##har -tympanic -insults -diesel -pteryg -vertically -intradermal -appreci -spheroids -ddp -##cephaly -18s -duplications -preoptic -##arbox -247 -kupffer -mercapt -##enclamide -irin -hch -##onsin -cpe -recirc -tata -emboli -protozoan -heterotopic -certainty -##epsia -253 -##anate -pretest -##utres -thermophilic -440 -abstraction -telomeres -vegfr -##onders -adenosyl -intragastric -cycloaddition -spm -##orescence -mercapto -alumina -tentatively -advocacy -gk -pnd -##vhd -stays -temperament -compress -##amili -bullying -unfavour -habituation -##retinal -emr -##ocholine -substitutes -retail -confidential -##iso -paraventricular -scopol -wal -conspecific -completeness -antihist -characterise -catastrophic -excip -deoxyribonucle -ceftazid -glib -orbitals -lipof -##atur -transcutaneous -##che -precautions -kinin -minimization -formic -dyspepsia -##eptor -itraconazole -ramp -bisphosphate -parag -phenols -peru -odontogenic -profit -fluorodeoxy -##worth -tpn -reversibility -hil -extras -infiltrated -##tring -nes -senescent -autogenous -erps -naltrexone -##344 -##fi -capd -radioligand -contours -affairs -altman -##enolol -dehiscence -synovi -loud -rs10 -assumes -fms -amikacin -irinotecan -smcs -cephalosporins -conferring -insula -mismatched -oxidoreductase -concert -##ysteroid -restricting -redundancy -##ionyl -sorbitol -dislocations -##ryl -##lers -cereal -##lingual -bart -##roke -igt -queens -nipple -buffering -scalable -##oneu -permanently -chaotic -ganglioside -##iaxial -oxygenated -compositional -inclusive -hypothyroid -internationally -farnes -thinner -gulf -crystallinity -##hb -placentas -cartrid -wisconsin -##famide -##oprophylaxis -wheelch -recalled -alkylating -activations -electroporation -bival -cob -bras -lrr -convincing -ld50 -antifer -intensively -dpi -##nn -athlete -##obutyl -cystitis -haemorrhagic -hilar -##reach -ceftazidime -splanch -exhaustive -1971 -myelopathy -vitil -##tiform -##atri -massage -##a3 -administrators -cpc -##rey -polyureth -autophagic -pgr -divide -glue -rts -vta -deposit -##ibrils -fork -##ll -##ycholic -defibrillator -orfs -threatened -leukemias -appointment -ira -merist -isc -cape -myr -cytostatic -videos -agen -cohen -northeast -dash -informatics -malaysia -governance -kainate -adnex -bears -dihydroxyvitamin -capping -replicative -uracil -bcva -villi -ats -cyclopent -rsa -proposals -goblet -papilla -##amole -bulls -##oethylene -castrated -illumina -oint -oesophagus -##athin -spouses -trains -pdac -spider -lowers -ownership -##l3 -decont -craniotomy -disproportionately -2p -mph -crowding -##osarcomas -##cedural -pvc -chelation -##itism -invertebrate -##fm -ipt -transferring -pge1 -scfv -olt -proteinases -##ums -hscr -pae -isotype -favors -rantes -268 -decidual -unfamili -immunology -atopy -typed -##econds -tie -lift -glut4 -smr -ille -diphosph -##olamine -##angiect -humid -hyperinsulinemia -biochar -cervic -##factorily -##romal -atenolol -attrition -##aciens -converts -faith -morris -vaccinia -ethoxy -nq -##junction -moves -dispersions -fertilized -periventricular -parat -reca -nonsurgical -257 -borrelia -nonpregnant -##scat -##gener -modestly -customized -scopolamine -##kc -cheap -issued -electrophysiologic -c12 -305 -bicycle -dwarf -isothiocyanate -sphingosine -estimator -discriminatory -choledoch -intergenic -dentures -ankylosing -##uristic -requests -aquap -dyads -##eav -isole -unaw -polyneuropathy -rds -278 -taxol -casts -anticipate -posttransplant -aminoglycoside -arabia -##americ -batches -adopts -##electroph -survivorship -##enedione -interprofessional -opc -taqman -corrective -accord -##aparin -waals -losing -##cast -cautious -##osynthesis -lan -micrometer -##factors -microliters -##rolithiasis -endosomes -knot -miscarriage -kaposi -258 -##olytica -nachr -macrop -arteriolar -anticholinergic -umb -staffing -marketed -sirnas -computations -renewed -mesenchyme -##enne -vagotomy -##ocratic -employee -linol -ethylenedi -speckle -##ifos -apgar -regenerate -rpl -spermidine -retinas -fent -##atechin -dipyridamole -egta -outreach -##onis -exacerbate -correspondingly -chalc -jack -lifting -##planned -nef -incentive -##achus -anthracene -framesh -stripping -##tip -##oxine -273 -410 -##ventive -gastrop -##enched -nonparametric -matured -fault -stm -saccade -##uronal -##asure -piezoelectric -amalgam -##ithm -esd -virion -buried -propagating -##opyranosyl -nomenclature -jas -bcl2 -noncon -proband -breakpoints -prices -miniat -285 -pectoral -microdiss -##eca -45ca -##gesterone -thiobarb -disruptive -macaque -coeliac -oligodendrocyte -cereus -cd31 -salicylate -##retic -intert -mucositis -synthesised -rewards -mma -carin -scant -definitely -colostr -aeds -##oping -aac -collisions -keywords -nlrp3 -kine -antiangiogenic -unfavourable -enucleation -football -##apent -myelodysplastic -##enzym -thickened -hyperrespons -fw -crashes -yersinia -glibenclamide -backscat -mrp -##anesulf -antineoplastic -obstructed -distinguishable -snack -focuss -##osaminidase -signed -vitiligo -htert -##her -288 -fertilizer -ubiquitination -submic -interim -complicating -inlet -acycl -deciding -##analy -##erry -301 -jia -putres -tbs -ancillary -eco -oxidants -##odystrophy -rca -subreg -goiter -bodily -forage -crystallized -delib -##osahex -##ulitis -ontogeny -chips -intracytoplasmic -dad -unint -extracranial -overgrowth -##fractionated -##arins -htr -##phae -epilep -adenoid -isolating -cole -##otency -unconscious -roch -##oeae -sprouting -endeav -ghrh -peroxisomal -263 -##rogenital -##09 -##rospor -##entrifug -bulim -nanotechnology -tir -acrosome -dapt -electroencephalogram -splanchnic -asparagine -asphyxia -acd -brs -nanostructured -gravis -##urinol -diphtheria -mania -bouts -holo -silenced -304 -302 -cavitation -##cam -manic -epilepticus -chic -sprint -parkinsonian -sternal -vertebra -glycosaminoglycans -exogenously -microbub -leaks -patella -isotherms -antisocial -reversing -dqb1 -##antine -propagated -##centration -rectangular -continent -comt -ventrolateral -corticotropin -misleading -ultrathin -maladaptive -scrotal -iiib -canis -hairy -phonon -heterochromatin -pbr -267 -##oconazole -≤0 -organisations -alignments -bioaccum -thalidomide -dz -immunosuppressed -salicylic -perchlor -army -streptavidin -awak -quarters -appearances -sesqu -satisfactorily -nonselective -inherently -##ochond -ferromagnetic -researcher -arithm -ql -predetermined -rhizosphere -pallid -degs -##jiang -optoelectronic -vulvar -posth -inquiry -nitrobenz -bde -h9 -qm -evs -arithmetic -dth -azt -##yrinth -biomechanics -anhydrase -tethered -dimethoxy -russia -##tida -officers -malfunction -microvessel -460 -amlodipine -##ofibrate -broilers -##odilution -ota -##tles -##anoid -apache -achr -fcm -polyphenol -spermatog -fingerprinting -##blas -hepc -classically -##rew -endosomal -##±5 -ginsen -coexpression -##lux -photovol -rio -proactive -steat -coculture -socs -reflections -t1dm -philosophy -##onella -##atp -fontan -lymphedema -##cet -paulo -hydroneph -vinc -##nl -bringing -shadow -rapd -##etit -cooled -##tonic -shield -520 -microvilli -nct00 -histochemistry -##vo -##agliptin -phc -##2s -papilloma -coliform -biomimetic -lepr -bism -##epo -esophagectomy -legionella -learners -ovariectomy -subsam -versatility -millis -acous -docosahex -contingent -colostrum -##iflor -anthocyan -##imidine -##ipsy -decisive -##orating -##etan -capita -disassemb -##week -chemopreventive -fpg -hydroxyd -normals -omn -chelator -instar -misoprost -##etium -subclasses -reoxygenation -##imeters -paced -##gamm -purple -distortions -avidin -profess -claudin -euthyroid -aliqu -shbg -synovitis -gonorrhoeae -switches -thyl -paco2 -##itim -glycoside -groin -antitrypsin -##aracter -276 -##rian -roux -thiobarbituric -misoprostol -amoeb -##ourinary -heteros -##oglitazone -ttx -logarithmic -cemented -palmitic -##nr -undern -##umenting -chen -##200 -sri -lc3 -enolase -intratr -menarche -administrated -smith -##lear -depolymer -macul -erythropoiesis -accompany -mineralized -fabp -commensal -cler -cytochalasin -unipolar -agenda -telangiect -binomial -sod1 -coagulopathy -pdr -ced -kw -stick -##ybden -myd88 -eralpha -lord -bystander -sinensis -modifiers -hiber -calm -##ptosis -flavin -provocation -fluorophore -desaturation -receives -mycophen -radix -immunoperoxidase -procollagen -thallium -fmlp -##anoic -sterols -262 -edu -gym -intracardiac -##tera -##grams -mounting -diis -crosslinked -##ymia -radiographically -ophthalmology -##isis -g6pd -##oba -arteritis -pioglitazone -postgraduate -hang -##apa -anopheles -evenly -hpc -urogenital -desulf -dextrose -##ortus -α1 -mtc -autograft -##ilia -peritoneum -hemipar -michigan -phent -arid -nkt -spleens -cooked -palpation -mcao -clathrin -ils -inclination -lecithin -astrocytoma -poliovirus -empiric -##elian -mares -ghana -##opoietin -##264 -amid -phytoplankton -gras -cse -vocabulary -genitourinary -phrenic -##obr -rhb -plotted -xyle -vocs -##jejun -ene -tough -monovalent -inspiration -##litazone -dj -##affinity -rotated -osmolar -fellow -lcs -aphid -enterococci -e4 -wallis -mpfc -nevi -documenting -postal -anterogr -tnt -radiologically -painless -swing -harbored -atypia -agenesis -baculovirus -satur -lvad -sulfamethoxazole -marsh -##riol -ffp -ema -greek -##igmentation -thyroglobulin -##fig -##ropylene -anoxia -pharmacies -cornerst -graphs -##ombic -hemi -femin -bmax -strabismus -naked -##itans -##formans -##obaric -tune -krus -vanadium -##icum -subth -topically -enterotoxin -fissure -acellular -##irm -##ohum -misclass -##agog -hallmarks -χ2 -professions -tendencies -entitled -##ecret -shoulders -##otrophs -##f5 -f0 -pret -##ui -maltose -##grass -transluminal -tails -supern -chorio -untrained -protamine -tolerable -##ofr -pseudoaneurysm -things -koh -nog -ltb4 -##plus -cofactors -neurotoxin -aic -##ermectin -dilutions -macros -glot -##oembryonic -fe2 -##ounds -cystine -fru -##epsy -hin -urate -clonogenic -synergism -sort -zwitter -muc1 -##iment -##othre -a4 -keratoplasty -avascular -necessitating -sip -rifampin -##lycaemic -achievements -pathologist -plethysm -wnv -monocular -reminis -virtue -##ocycline -##front -##tinine -spaw -accent -noncomp -lobular -widths -translocated -synech -southeastern -##orbable -acetylcysteine -zns -ppg -mustard -##triatal -coumarin -nucleosides -scholars -garlic -hyperuric -catalytically -soma -ruminal -citations -stories -lentiviral -##5y -272 -##akis -circumvent -nse -apps -fst -grossly -chlamydial -electrom -impressive -passively -unintended -myri -gangliosides -featuring -sbs -abduc -identifier -##azolin -ribonuclease -vagin -b5 -contracting -lifetimes -##legic -##ybdenum -impulsive -adaptor -##yramidal -symbol -pid -eicosan -ethi -##adiene -hybridoma -recruiting -##rier -##terna -harms -dura -reser -shang -tlrs -tce -intracoronary -matrigel -basket -arcuate -octyl -ethers -mpi -polymerases -czech -transwell -##osupp -protoplasts -##fected -intuss -secretin -##ylon -pyridin -pyre -resectable -##feld -312 -intracellularly -nonresponders -chaperones -##ovaginal -ikappab -yt -impingement -carcinoembryonic -japonica -wt1 -mastoid -taf -##gm -ecp -postsurgical -endocannab -helium -thermoreg -competitively -unidirectional -hts -desmin -##iden -hemagglutination -osteonecrosis -oculomotor -luminescent -io -taxon -infarcted -##umes -multinucle -tanz -erythro -capped -mobilized -whate -abundantly -inserts -390 -whatever -turbulence -myometrial -oligomerization -colliculus -myofibroblasts -##atrial -polysomnography -ild -glycosaminoglycan -ji -rac1 -averages -sarcolem -tell -postc -indo -lasik -unfamiliar -smoker -disrupts -remnants -##adin -bioinformatic -posttranslational -uganda -isi -transporting -postcon -isotopes -putrescine -aminopyr -cia -lia -genic -antrum -##energe -effluents -biotechnological -tween -sublingual -ctni -scaph -intuitive -acyclovir -neuroticism -abused -extraordinary -tier -##e2 -gist -ttr -recombin -harness -chemoradiation -cant -mlr -proves -metallothionein -##uloplasty -##acryl -chimeras -##evalu -aminopeptidase -ikk -cerc -ibm -functionalities -beer -icus -cpd -immobility -fluorodeoxyglucose -kinet -##rolactin -ctgf -anorectal -##cnt -additions -couplings -cloac -tle -neoformans -expands -hapten -hibern -teenagers -nests -endotox -asser -##extraction -brucei -##hibition -oligodeox -sirolimus -endorsed -levofloxacin -dul -intussus -taz -isoc -biogenic -conidia -opson -colors -methylphen -430 -##ett -exponent -hne -##exc -dus -precede -teratoma -##albumin -plt -calories -##urization -tailor -316 -electrophysiology -thromboplas -hypochlor -pce -tramadol -lav -illustrating -seminiferous -rye -summation -microcys -competit -fingerprints -inad -cardiotoxicity -proteobacteria -tuberculin -cili -greece -administrations -splenomegaly -sio -hypoperfusion -perist -galanin -palliation -peroneal -hydroxybutyrate -##androsterone -yang -r0 -reduct -nirs -mop -plank -##ectral -climbing -sensations -goes -##activator -prb -stocks -##ulectomy -tailoring -isoprenaline -##uese -hypertensives -##aterals -unplanned -stasis -hpt -electroencephalography -##uctured -srp -rud -alliance -radionuclides -orphan -nontoxic -bronchoconstr -deserves -heuristic -portuguese -underline -sorbent -compost -mdck -erosive -dum -##uren -endoscopically -fermi -nile -gonads -breakpoint -stau -abp -deciph -racem -insecurity -nighttime -incisional -impregnated -nfat -subsurface -manifests -analytically -aspirates -unavailable -hyperphosph -bred -stoma -261 -precedes -dmba -##roplas -lw -autoregulation -dlp -##vc -noisy -coagulase -##ectum -wetland -undefined -supplying -cd36 -multicomponent -mrc -shorten -cga -flask -##ospora -##unn -unacceptable -wp -##ogs -303 -hypersensitive -296 -varus -federation -##ohydro -spatio -worry -aggravated -chlorophenyl -vasomotor -sporulation -##atent -furan -pm10 -##pir -icv -gabap -cocktail -ond -470 -lca -488 -disagreement -tungst -occupying -synaptosomes -adrenocortic -##anglionic -afb1 -constituting -microinj -##apr -clearing -##orrected -utilised -thylak -neighbouring -nanod -polyploid -glenoid -stat1 -horizon -kal -satisfy -extrapolated -neurofibromatosis -benthic -anthracycline -incompatibility -ppa -ppc -sulfonate -cooperativity -intraoral -##edics -##ryst -equimolar -##overs -aquac -restores -##teat -hemagglutinin -enforcement -##perm -strata -avm -neighbourhood -lepid -##guanosine -oi -videotap -lateralis -ranitidine -controversies -indians -cd68 -dmn -hydride -##ocortin -bullous -spar -expose -flagellar -##idial -antiapoptotic -pufas -dag -npp -##ocereb -fats -##activities -intersection -mage -uncharacter -nonre -ctp -homic -appointments -lak -preformed -##otoler -blends -nscs -adjustable -normoxic -orexin -##ibrillar -##olium -b10 -##iliensis -cme -primordial -tritiated -amitr -##gravity -collaterals -witnessed -##avastatin -retinitis -c9 -races -alfal -youngest -sesquiterp -cataracts -archaeal -##inators -krebs -producer -aggl -##ocarpine -286 -tfp -##ups -gabapentin -ito -sway -ferroc -embryonal -##ibenz -narratives -tdp -##anteric -endogenously -##140 -minocycline -ntr -disconn -269 -##atran -endoscope -difluor -approximated -gallstones -protrusion -polyurethane -exciton -neurites -##olyticus -##rooms -6a -avidity -perforations -truth -remif -##ecretory -isos -periodicity -fastest -overlapped -vero -consultant -mock -hexa -semistr -sterility -clavul -server -277 -inhomogeneous -pons -femor -nsc -alfalfa -yoh -lycopene -creative -aap -xeno -##ophthalmitis -occurrences -steers -##tines -computers -revert -chi2 -guang -amelioration -visuospatial -depot -behcet -recommends -isoleucine -corneum -##ropion -ultracentrifug -##abp -##omalacia -##1c -##iances -##ectasis -dosimetric -cohesion -##imod -technetium -awakening -##amps -hydroxyphenyl -steroidogenesis -launched -fk506 -microem -linolenic -bost -perine -##kins -endang -dimorphism -maximizing -curett -prenatally -fluence -bromocr -semistructured -glute -nonsense -linc -##cha -clom -tourn -ocs -migrant -myelogenous -stabilizes -##asculature -hyperre -##omol -oxa -websites -540 -likert -##123 -neurobehavioral -amended -##uximab -giardia -defec -##etast -styl -##isole -nepal -ipl -##omimetic -chs -ivus -edentulous -cumulus -hyperresponsiveness -violet -##aya -##athion -infested -regressed -thalam -##acyclines -pef -cens -beings -parallels -##itations -ship -raters -posterolateral -infarctions -macrolide -vaginalis -entrapped -hypokal -b27 -##acental -revisions -bronchus -ethiopia -disadvantaged -ants -aqp -pink -##flower -turbidity -5ht -interchange -##kt -metic -##oglycer -fucose -indwelling -intoxic -tavi -292 -participatory -seated -musculature -subch -logarithm -amnesia -glucopyranosyl -oliv -occupations -##ylline -schwannoma -hepcidin -etan -landing -xylem -gat -peo -sgl -vats -molybdenum -unexposed -zeolite -drow -hyperglycemic -##aska -sonication -fenton -transepithelial -sublux -##izine -thromboplastin -##amellar -flocks -##ulsification -weakened -##ynes -taa -installed -clarification -remifentanil -drag -niches -pursued -cuticle -0004 -desicc -villus -spiroch -urology -subspecial -atherogenesis -osteopontin -lymphoblast -##eutical -##abilis -cotinine -pupils -malnourished -esterified -##arative -saved -kruskal -resistive -tensions -intravesical -##oquine -diverticulum -allopurinol -formally -calend -274 -payments -assemblages -scapular -haw -starved -##romedial -chc -dwell -##echoic -spars -conson -##osinus -vacuole -recession -knife -aom -catchment -tav -preimplantation -sales -occupy -foref -refrig -baical -wl -##yps -disposable -##ometrium -hart -conjunctivitis -##vised -digits -asking -deoxynucle -tobr -childbearing -##opathogenesis -jobs -companion -aeration -diterp -cements -##ulans -forceps -carcasses -hym -predictability -##tiana -gonadotrophin -tnp -loneliness -belgium -prs -intensification -iont -appendectomy -androstenedione -circumflex -fluoresc -flt3 -herbicides -sy5y -hypocalc -##rance -##ender -vitality -cotyled -undetected -ait -incubations -##rolactone -quinine -disintegr -mainstream -metach -lapse -harr -isov -longus -unemployment -reevalu -fluoroquinolones -ruminants -hyphae -mononucle -335 -sting -stall -validating -271 -haemophilia -defibrillation -protracted -neutralized -atrazine -guides -docosahexaenoic -watershed -cd5 -curettage -lq -puff -ketoconazole -##adhes -wheelchair -fistulae -##oxys -coronavirus -postpro -tod -northeastern -ubiquitously -##eptors -spas -nanometer -##testosterone -leaflets -##bed -necessitates -vastus -budes -299 -otc -##etector -resistances -fuels -pigeons -mtd -ctls -assembling -##thiaz -cpl -endophthalmitis -dominate -##operitoneum -cgy -##cls -##domains -quebec -tetras -detached -immunolab -print -invade -burned -##inarily -posttest -##ozoite -cholesteat -unpro -suddenly -##amilies -nights -##rogesterone -cheek -penta -diuresis -##low -extractable -invaded -inevitable -adt -agrob -intercon -reb -pharmacodynamics -rewarding -urch -999 -##ulant -basically -budesonide -pervasive -##ultures -cellularity -##ochromic -disintegration -phagocyt -dca -spc -spectro -berber -aggr -gingiv -coincidence -rpm -##benzene -thermodynamics -boston -echocardiogram -hyperemia -##interpre -amounted -erα -accelerometer -8th -rosig -pockets -smi -prrsv -spur -entorh -allied -sirt -retinoids -hpv16 -parsim -hydronephrosis -##olyl -condoms -##urant -##aphy -complicate -##relation -##ubic -ceased -reserves -##oconjugates -ssdna -merg -irid -##hydration -matters -hypertriglyceridemia -gills -oxldl -lesional -thiop -cellulase -hypoxanthine -intussusception -rgs -dissections -thymine -caught -myometrium -##hel -steroidogenic -lns -##ository -arthritic -##y1 -gapdh -pomc -rcs -erosions -rosiglitazone -taut -##nit -embr -promyel -bismuth -sorgh -trifluoromethyl -chondros -vat -congru -pasture -wetting -##elvic -stomatitis -uncondition -282 -aptt -c60 -hydroxych -lignoc -examiner -ambulance -premed -relay -##ointing -4s -##ospheres -mesial -foveal -chromaffin -nevus -sumo -osteoarth -parvum -bilingual -inactivate -coq -underestimation -appreciably -850 -##vales -##iptine -c5a -##epiandrosterone -guil -eoc -unsuitable -phb -##identate -##pora -gonadotrop -sealed -6h -accult -##adecanoyl -yes -decou -relieving -zea -##represented -ointment -legisl -triceps -ffr -pj -##ercept -adma -colistin -shade -landscapes -##iflex -##gae -inex -dissol -alga -grained -benefited -uneven -waking -##romas -##ulosic -conjunctiva -joined -384 -subretinal -hydrostatic -##onite -cytored -suvmax -##erian -depic -generalised -guard -thz -prospect -##imbine -ned -unpaired -##itarian -gsk3 -##ofovir -wort -apnoea -brightness -exemplified -penicillium -feeds -##with -excretory -striated -masseter -perfectly -##omac -mvpa -histogram -bronchopulmonary -intrathoracic -endop -multilocus -hyperglycaemia -hmw -##ounting -masticatory -pooling -apap -hydrogenation -houses -aetiological -detergents -estrone -fluoroscopic -filler -chlorp -##iptan -reclass -kim -spf -nonfatal -mammographic -classifiers -tegmental -tdi -dide -il6 -holter -photovolta -31p -immunisation -experimentation -phacoem -anticipation -propidium -enteritidis -bloc -automation -esterification -coryne -metamorphosis -unreliable -daf -gssg -trep -b19 -indium -phyll -entorhinal -nematic -runx2 -pilocarpine -instituted -afflic -asymmetries -lpo -helicase -restrained -mrd -aversion -gip -flowing -phentolamine -##acylglycer -employers -packages -genuine -thrombectomy -reex -wearable -maneuvers -bromocriptine -##ortin -arteriosus -hydrate -cefota -uncorrected -virologic -3m -##xs -stenotic -##plen -##operable -shs -432 -cytoprotective -##obox -mbs -ate -laccase -##urv -ovulatory -ntp -pco -stools -blm -psychophysical -elaboration -mpm -redes -masc -##arcin -stretched -cbl -345 -qc -##irs -executed -obliteration -lbw -vascularity -##atidic -trisphosphate -colic -purulent -accession -torsional -cgh -pso -impressions -marking -cimt -mbq -etanercept -rafts -oximetry -yohimbine -pristine -xenobiotics -analogy -zymosan -intraspecific -pyelonephritis -hepatobiliary -bisexual -12th -tobramycin -mud -palmar -leiomyoma -excitations -284 -soldiers -##inqu -##afe -nonne -eighth -underestimate -duk -3s -stroop -##bosacral -oviposition -polypropylene -ambiguity -oe -spectrin -tard -hyaline -endoderm -hematology -stacked -repositioning -preferably -##idases -paraquat -antipar -conserving -rns -alloc -##aclopr -reserpine -##bert -##ygb -ipscs -dup -sensitizing -latitude -obligatory -anthocyanins -patell -successively -279 -neuralgia -ergometer -anterograde -orr -boxes -lithotripsy -instruct -##omys -inadvert -pontine -privile -brucellosis -massachus -phacoemulsification -carbons -accepting -##angli -##opulation -##hand -dehydroepiandrosterone -##inae -complexities -catastroph -massachusetts -stereoselective -gallstone -underway -resistin -oac -##iliation -283 -mats -unim -anthocyanin -ewing -##ormone -loadings -##ixed -##zees -centromere -bites -chimpanzees -ctd -333 -cyp2e1 -nanocar -##ensor -mdp -hits -predisposed -rearranged -hamart -##urement -vaccinations -sacrifice -erroneous -centroid -foodborne -##pers -autofluorescence -brca2 -scenes -imprec -slurr -intercostal -##merc -methylphenidate -ambulation -triphosph -ccl2 -saponins -fps -unambiguously -ldlr -##azoles -educate -##k3 -##exia -doubly -cnp -hypoplastic -##145 -interdig -##dam -glucopyranoside -aniline -unreported -phs -satiety -bars -conformers -ensuing -ventromedial -epileptiform -insular -religion -angiograms -sdh -costimulatory -p24 -bisphosphonate -cvc -##gap -bak -quintile -tnbc -calcit -truncation -##alazine -periapical -colocalization -21st -dutp -proteus -passeng -treatable -needing -##endor -convales -intramuscularly -powers -##ostin -##yan -cah -intensely -femoris -peripherally -##ococcosis -neocortical -interfered -ladder -tcd -bicuc -mrl -##bian -regrowth -##robe -suturing -aip -elevate -mineralocortic -resides -##aluable -##entery -captures -##rological -deconv -decrement -refused -##rofacial -pads -chik -succeeded -l12 -meld -##utz -lumbosacral -sorghum -impacting -urethane -shanghai -ally -unple -behaved -aad -sealing -sox2 -consulted -r3 -actors -erb -catalysed -fronto -acromegaly -glabr -preclude -hydroperoxide -brassica -preventative -##uberculosis -meningeal -pixels -consangu -##otidyl -poag -precipitating -arsenite -##eke -##ivalence -ptv -##05 -organophosph -##ocaps -asm -desert -feedst -seaf -bathing -cefotaxime -equilibration -cdp -amphip -microalgae -fee -##obese -##plasm -diagonal -phototherapy -scholar -diverged -625 -checking -industrialized -##xin -fight -agro -294 -naprox -alleviating -bj -confident -myelination -fortified -discordance -towns -neighborhoods -512 -313 -unaware -##male -##mentum -ror -sta -##itize -staurospor -malabsorption -287 -##acetamide -tss -gluconeogenesis -##iffusion -vowel -cd15 -nomogram -bass -spectac -##alva -notification -##essel -courts -unpleasant -##aniline -mam -yoga -agrobacterium -claudication -##ogold -enox -desatur -squir -nonunion -gpcrs -opacity -##irradi -chlorpromazine -##aer -forelimb -judge -gravit -melanocytic -oophor -revis -deregulation -aster -##bon -financing -exams -premolars -syndromic -thrombocytopenic -admixture -fatalities -deformations -##igration -goss -photography -trnas -send -bronchiectasis -metatarsal -tuk -306 -bore -##anx -registers -##anciclovir -ssris -fusiform -elastography -##ohumeral -miu -aquaculture -##ograv -m4 -##oacetate -beta3 -##duoden -amphibian -diminishing -governments -##oglutarate -##opian -beetle -##oi -pneumophila -bicuculline -318 -nontrans -454 -##umatic -405 -phenoxy -297 -unins -##oken -intubated -ivs -##etence -atlant -papillae -##atiles -cdkn -289 -dissociative -metastable -husband -calling -disruptions -dsb -camb -perfring -parth -##ogel -recurring -phytohem -lighting -vinblas -milest -lined -##zn -interr -macroglobulin -##orientation -##oguan -insured -malocclusion -nothing -s4 -ssb -steam -fouling -##ralpha -##tigraphy -collar -oligodendrogl -diap -ration -fulfilling -micronucleus -##ymmetric -ert -circumc -wilms -contraindicated -promises -dithiothre -quenched -investigational -srt -centrifugal -triphenyl -hx -incubating -##actyly -mibg -chirality -cen -rrt -naproxen -##oria -##urans -hw -hyperlipid -hfs -adal -ultracentrifugation -musical -noninvas -##imol -intricate -##cellulose -hypogonadism -perennial -lateralization -polynomial -rads -##lr -##ruc -cardiover -triazole -paris -oxford -cachexia -3c -rear -tibialis -hscrp -dithiothreitol -281 -energetics -basophils -confused -hco -perpet -undertake -snoring -##127 -extractions -diagrams -frogs -gos -ofloxacin -hinge -dibutyryl -volatiles -##icola -hypercal -misdiagnosis -hemangiomas -purchase -disciplinary -##meas -sleeve -acetyls -gq -dissecting -f344 -dppc -postd -manometry -herni -estu -##gy -betaine -309 -micrographs -destabilization -sily -##06 -mpr -raft -bisphenol -immunoregulatory -aftern -unintention -##vt -unve -euthanized -laxity -##optysis -microstructural -repetitions -##105 -intestines -##roni -electroretin -organize -α2 -cas9 -constitutional -wga -cephalic -##tetr -extran -colocalized -immunochemical -##ropath -prolonging -##olecule -dialogue -corpora -cartilaginous -gct -submerged -isogenic -proliferated -carinii -erbb2 -calcitriol -stimulants -##worms -pollination -tribut -##odial -purpos -##crystalline -obvi -inte -expansions -angiogram -euthanasia -##ontally -duch -gloss -##onolactone -tagging -paranasal -reminiscent -keratoconus -antegr -alphab -hydroxylated -##yxin -concentrating -spans -accuracies -pemphigus -wss -raw264 -##ynchronization -##rimers -hete -##onto -nph -323 -adipogenesis -##co3 -arginase -triam -##jury -appliance -##hole -dressings -##ictal -colloids -advisory -avf -microsphere -##uities -##ofrontal -daltons -##urative -560 -underscores -hone -##athesis -rhd -##isations -lil -transglut -photocoagulation -breadth -enantiomeric -ablated -methox -supraventricular -decontamination -##asy -reticulocyte -h5n1 -dps -substantiated -tukey -biochem -enclosed -esp -apheresis -satisfying -vk -infinity -d5 -sulphur -sympos -neointimal -chromatid -thymocyte -tapping -repulsion -muco -invaluable -delphi -absolutely -silicate -355 -diminution -ergonomic -afternoon -smartphone -ropivacaine -tdt -valsalva -pests -##uries -dehp -##ophosphorylation -fullerene -normoxia -lewy -cucumber -aec -refolding -recalcitr -##urd -minorities -premalign -favoured -osseoin -architectural -microangi -brushing -gastrost -mayo -bsi -homeobox -vinblastine -##ophile -irreversibly -##tised -##iasm -neuroradi -gust -accountability -p70 -vp1 -##zolid -spouse -founder -tec -multiply -rape -rosette -gstm1 -provincial -syd -parane -pyogenes -cd86 -mumps -mig -guanidine -lentivirus -perfringens -abusers -435 -ppis -##holm -##ached -eac -retrotransp -unequal -gelation -##odality -arbor -motives -neurofilament -##idov -skf -pluripotency -serca -grasp -##ughters -derivatized -spermatids -##±6 -thaps -dmp -https -fors -hrql -invag -collagenous -##refring -##icon -vocational -reluct -3r -articulation -dimensionality -gallium -solubil -rhinosinus -rotary -##t4 -naphthyl -##spr -##tens -dihydroxyphenyl -assisting -bont -chondrogenic -##util -ceramics -unfractionated -governmental -birefring -307 -meteorological -trx -coarct -tadp -orofacial -##phalan -1969 -anterolateral -immigration -glassy -ers -tunis -precipitates -propylene -324 -removes -##olimus -dtc -multiform -##acetam -avenue -reint -##otetr -tonsillectomy -##atalysts -##iscal -##bm -impairing -carcinogenicity -##organized -preponder -convolution -variceal -workshops -earthquake -sunflower -updating -unity -cued -prematurely -epigastric -effected -pbdes -bags -anticipatory -dyspnoea -deriving -leachate -postures -mfs -chelate -pyloric -crustace -##ibi -antegrade -##carbonyl -diffusive -finished -neutrophilic -diminishes -tanzania -##utamide -incurred -sunitinib -metabotropic -##plasmic -precoci -cd95 -##hss -p0 -##ilol -##udinal -proving -ddd -cdr -simplify -phenotyping -operant -pyreth -gefitinib -labyrinth -ivermectin -oleate -cornerstone -irb -314 -reinst -hoped -iad -staurosporine -##glycine -aptamers -melphalan -t6 -insulator -##oub -byproduc -oviduct -ptr -##nut -sirs -aat -##onitor -pvl -##tilbene -expired -amides -##rocar -##ophilicity -weap -isokinetic -##ifn -nonobese -paradoxically -rhinosinusitis -ted -dls -proteas -##ilson -suicides -galnac -398 -dermatological -landfill -septicemia -citation -relaxations -conceal -thereof -disclose -psg -johnson -nonionic -confronted -1960s -##oserine -##pyruvate -carboxyp -thymoma -immunopos -##bling -328 -##me3 -nylon -##ventional -rpa -##4b -tbp -critic -pancreatectomy -voltages -##akic -ipd -mtr -338 -calendar -nestin -##aci -overuse -mycorrhizal -premise -cacl2 -firmly -##ilation -wrink -quinidine -anthel -17β -veterin -nucleoli -radiosensitivity -cuts -lk -endangered -hybridisation -attribution -##ogas -exhaust -##vular -##fb -dihydrotestosterone -dicty -typhi -perit -##occus -dock -##peri -##opard -pyros -ptt -mict -divor -spme -solit -##neumonia -sponges -jam -lir -mendelian -daughters -snf -transr -capec -guanylate -##ptics -polymerized -decompensated -enters -linezolid -##obal -##pps -hemolymph -cardioverter -interviewing -caa -salping -##oliation -##mscs -4e -bioaccumulation -##ohydrolase -##ecture -cyanobacterial -sternotomy -##imed -download -cannabinoids -##vp -assistant -##l4 -patern -etch -populated -bse -micronuclei -cytokinesis -subchondral -spinach -##omus -lmw -spotted -##bin -clinico -hypothesised -nulliparous -butyric -lobar -##obacillus -paroxetine -oryzae -cleave -neomycin -rutin -phenolics -egypt -##fd -336 -322 -##ophiles -pedestr -##ospores -aroma -##amptic -grief -##3c -##izyg -##kel -subfraction -colombia -pann -ctcs -noninvasively -##oda -hydroxysteroid -macula -##anthus -pht -326 -kyphosis -phytohemagglutinin -calb -sangu -##alact -assistants -photothermal -ffm -##ocally -malle -subsid -capecitabine -trkb -eradicate -victoria -proapoptotic -laminae -bsp -##road -##ometrics -12p -handicap -entom -##4002 -dhe -##adenoma -abstracted -##pervised -equatorial -pvs -fluorescently -worsen -percentiles -##aresis -circumscribed -continental -empy -mussels -thapsig -##yi -##occup -tritic -tcf -##uating -317 -improper -anecd -bursting -apatite -globus -skelet -labial -##atement -squirrel -broc -photop -sponsored -prk -nonlinearity -adalimumab -cetuximab -subtr -neurotensin -metallo -fumarate -quorum -mitogens -barely -conformal -grant -mesothelial -##din -6b -##imbic -bonfer -marc -misinterpre -##atentorial -numb -diox -##urational -intercept -panor -paraplegia -variances -looks -renewable -pgd -##160 -##ohepatitis -##adias -volv -microelectrode -##2p -reven -##ocerc -forehead -superconducting -cd40l -coryneb -apoc -cynomol -counteracted -dealt -widening -##vr -neuropathies -stan -##alp -ida -spiro -thuring -exotic -commenced -spared -##hyper -telec -##ocystein -everolimus -thapsigargin -tendin -abolish -rhabdomyosarcoma -##abric -fivefold -phobia -benzoic -nociception -##opexy -degenerated -lta -ym -indolent -hunger -quantitate -1968 -amitriptyline -inval -nonsyn -psychologists -endosperm -humanized -ascs -##cl3 -meanings -hairs -pws -hypotonic -jn -##omycetes -skewed -orthodon -hamm -##oprofen -osteopenia -aplastic -exome -leukopenia -zidov -neurotrophin -nicotiana -transrectal -pneumocystis -e2f -interferences -##eedback -handled -##ipes -resonator -multipl -lc50 -##fed -coadministration -anhydro -291 -walker -##peak -auricular -inoperable -mainland -oriental -##umer -prein -##ymb -ccp -therapeutical -##enzymatic -localities -hydroxybenz -draws -lysed -ecologically -myriad -##elen -mt1 -decoding -uncharacterized -##ymet -hypere -##otypical -hemin -hydroxyurea -##ucher -berberine -eud -##piper -examiners -peculiarities -microti -##estrus -##odeoxycholic -pepper -##asters -hash -autocor -repell -formalism -sids -denoted -##cyt -premalignant -ims -blade -scavengers -stx -meant -zidovudine -readout -mwcnts -lactobacilli -knew -tetradecanoyl -mated -trimer -dde -ucl -p5 -biomolecular -wilson -opposition -dendrimers -entries -1965 -electronically -##iliac -opaque -polyelectrolyte -##v2 -##ims -##openem -pex -organisational -itch -dipeptide -abutment -varicose -asi -dissect -preserves -intercalated -relaxant -advisable -gastroduoden -serologically -enhancements -342 -thermograv -texts -corroborate -##odular -agonistic -policym -reuse -coincides -provisional -enterpr -stepping -deflection -verruc -normalize -cardiov -entrain -polio -cyp2c19 -##oretinal -##rolases -##mates -##usen -really -harsh -duchenne -cd18 -ebola -gelatinase -##apentaenoic -antitum -relaxing -chx -fallopian -##opropane -advertising -entails -packaged -##c12 -pud -clipping -command -officinal -ft4 -frameshift -tour -sh2 -centuries -##uta -hccs -icf -drilling -##109 -gilts -##arming -topo -camkii -contag -nitrification -electroencephalographic -urge -bonferroni -##udied -##robacter -343 -deoxyuridine -nonoperative -piperacillin -##bands -trivial -##ucker -##oduodenectomy -##trich -##urban -planktonic -daun -##fetil -adipogenic -deferens -worn -explosive -tapered -voric -sialyl -intramural -cog -stereo -subjectively -spend -inertial -introg -photob -mpn -salience -mammal -agp -aspirate -##rozole -passes -hydroxydopamine -periplasmic -breasts -gis -eleph -timolol -##iones -alar -monooxygenase -pus -compaction -behaves -respects -aminoglycosides -xii -##q13 -demarc -specialised -##box -irradiance -premotor -kanamycin -gastrostomy -eminence -dech -provoke -hospitalised -faecium -##fh -aeromonas -tourniqu -##orters -cdk2 -flutter -teleost -cyp2c9 -dpat -microcapsules -fluorinated -luts -remitting -venules -tracks -fog -prebiotic -nonpolar -transplantations -pericardium -hyg -clefts -ptosis -demented -ept -##ariasis -hyperbil -##oplasmin -semis -insensitivity -stut -spoken -3t -protozoa -indonesia -##ophoresis -antinociception -progest -meropenem -returns -voriconazole -converge -louis -feno -cya -rbf -carved -bootstrap -interposition -dioxygenase -myositis -underm -depolarized -assure -pho -outperforms -reportedly -paraneoplastic -pyridoxal -methanolic -biofeedback -##onception -heard -lordosis -coarctation -patchy -sss -adrs -ahead -p63 -harris -papain -##isperse -##atalyst -steatohepatitis -adhesives -mica -enzymic -resembl -hads -cholesteatoma -opacities -##olumbar -lactamases -topographical -illuminated -onwards -pharyng -##ologically -nucleated -impede -daph -congo -neutropenic -ethanolic -425 -athymic -beetles -remind -perikary -protoporphyrin -cytochromes -amygdal -##ropical -pi3 -bronchoconstriction -hyperkal -ciliated -spirituality -plated -dpa -##yrrh -antigenicity -micronutrient -podocytes -mah -antagonize -feeder -puerto -microextraction -directive -532 -ileus -apcs -northwest -radiopharmaceu -elders -brow -perif -collagens -lecture -psm -photore -ntg -muscimol -##electrophoresis -dropout -modulations -oscillators -abbreviated -gramm -hamstring -pericarditis -pyrosequencing -js -caga -lanthanide -##orea -hai -triangle -phox -ecog -oab -##otyl -##yzed -flushing -transglutaminase -##oxime -abandoned -recombinants -antith -radiois -affiliation -##cross -##icularis -monolithic -uplc -##verbal -##170 -sfa -corticospinal -economics -haematopoietic -coincide -mary -intermed -acyltransferase -##amedullary -cards -rxr -alm -robustly -penetrance -reactants -terminate -crick -clc -pursue -##hemisph -cma -##obular -sectioning -npr -hostility -510 -radii -subthreshold -impulses -stabilities -pans -expenses -amplifier -amputations -undoub -##odiagn -##flies -##orelax -hva -minimizes -rk -##indin -superco -arisen -≥5 -shark -sanitation -##etallic -gloves -##galact -486 -jp -containment -interrater -##ovan -ly29 -##othiazide -dishes -hyposp -boc -##alesional -##ries -bitter -##obe -##oplanin -finds -anhydride -propri -discour -pgl -txb2 -macroscopically -congress -seeing -selenite -unsupervised -ias -jones -##odeoxyuridine -gtpases -shall -legitim -riva -attractiv -astrocytomas -chiropr -tachyarrhythm -interictal -expressive -##f6 -fabry -transmissible -sclerotherapy -municipalities -decorated -##held -ordinal -phenanthroline -amf -cmp -rooted -##otrig -univ -interrelated -nonb -cereals -translating -biologists -immunostained -nails -halide -pharynx -exercising -sero -microliter -##type -illusion -##pox -bioluminescence -heterotrophic -psychophys -flun -dcp -1800 -specifications -##ethylamine -succession -optimism -1p -decoc -cdt -silage -±0 -associating -microvasculature -metazo -kinetically -hydroxypropyl -framingham -minip -hemif -##aphic -chromatic -porcel -hmscs -punishment -impar -pallidus -vapour -troubles -emotionally -glaucomatous -bav -amr -laa -vntr -inconsistencies -aura -apposition -biphenyls -evar -##rocytic -inaccess -ppr -eos -nect -dph -n6 -##alignment -nanostructure -excellence -amblyopia -aglyc -##inavir -macronutr -tungsten -##abolism -dimorphic -kaw -euglyc -perfluoro -cyp1a2 -pumped -##aspinal -subluxation -490 -herbivores -6r -##uronate -##anediol -cecum -cran -hsp27 -angeles -calculus -periosteal -aha -uni -stimulations -cd80 -o1 -506 -stride -recess -catechin -##cf -paget -trophoblastic -ruptures -delinqu -##chard -erm -continually -textile -ablative -thuringiensis -vanadate -histograms -discol -laterality -aortas -mineralocorticoid -optimised -mers -fbp -##isd -##mers -seasonality -urologic -wetlands -undoubted -##980 -warts -osteolysis -logmar -lymphoblastoid -undertaking -turbulent -mullerian -##ulinic -enterocytes -tropomyosin -annotations -hypocalcemia -asexual -prun -magna -ido -378 -containers -pupillary -glycero -##ilicity -##loem -reconsider -secretase -dtt -flagella -transmitt -cook -standardize -##jo -##domain -angii -deciduous -cynomolgus -##hap -postis -780 -##eruleus -oncoprotein -procoagul -exchanged -tgfbeta -asynchronous -apn -taiwanese -gca -mycotoxins -sutured -alexith -genitalia -ifa -verteb -##encaps -britain -localizes -##holding -coi -mitigated -peroxisomes -fow -suffers -consultants -haptoglobin -localizing -traf -530 -lysosome -philipp -abb -southwest -pik -wellness -##wash -332 -antiferromagnetic -adiab -arrhythmic -summed -lump -pab -adhering -electrocardiography -clips -imidazol -immunoprecipitated -##wv -##olines -neuritis -sure -organochlor -agrees -##igenes -##enstrual -promyelocytic -##weigh -##emetic -ima -multipotent -pedunc -pugh -##cinated -dichloromethane -herp -ht2 -communicable -glabrata -bci -tsc -tumorigenicity -serosal -lamellae -sacro -##iosity -dmi -ecule -##osteron -##no3 -orienting -hemiparesis -gdf -notch1 -##emal -ross -tentative -zwitterionic -patellofemoral -dst -csd -##phia -lenti -uw -wool -sling -scanners -twisted -proct -shp -phosphoprotein -kcat -portugal -spark -roof -linkers -drained -##yclo -mycophenolate -gracil -hyperprolactin -podocyte -addictive -vep -distally -##uder -radiotr -intraductal -plantarum -informants -407 -##onitoring -vsd -uninsured -dbc -agr -attractiveness -dermatologists -##ontium -thoracolumbar -glomerulos -teamwork -haptic -fio2 -outperformed -##ivocally -unspecific -riton -metalloprote -bioavailable -sdb -ore -050 -elevating -indocyanine -ejaculation -wounding -##b6 -alu -##bachia -rgc -phloem -##104 -ferul -fluorophores -propionic -catfish -narrower -omitted -ensured -penins -diffusely -##rophied -refugees -##brom -precocious -ritonavir -microspor -burkitt -augmenting -iodinated -##maleimide -dormancy -vine -ncam -bvdv -books -mgmt -##yness -ili -happy -perir -dilemmas -bum -tams -permeabilized -enterocolitis -bragg -obstetrical -meconium -gossyp -##oprazole -recruits -symbolic -interferometer -dcc -319 -hamiltonian -brackets -##trauma -jewish -##ela -pneumatic -expiration -pulsatility -disproportionate -sscp -formulae -gynaecological -tep -vertex -pon1 -##silyl -zikv -rivarox -##cu -citalopram -coel -rams -coinfection -pallidum -xenon -6000 -ecori -cryptococcus -aorto -peel -reversion -tactic -hematomas -parvovirus -illegal -##grip -rivaroxaban -ptfe -transmitting -fears -proprioceptive -amenorrhea -aggrecan -##aortic -cmh2o -historic -founded -bombesin -metocl -estimators -##opramide -dorsiflex -trepon -cgp -lx -mapks -##hemispheric -adnexal -329 -1600 -vsv -naturalistic -##pping -cgs -binder -vhl -cleav -japonicum -multistep -flanked -baff -chase -systole -nitride -deformed -alike -empyema -teratogenic -extremes -aquaporin -llc -meters -prescribe -uvr -bcp -##alys -philadel -dispensing -glomerulosclerosis -snare -640 -potentiating -lpc -footprint -alend -histolytica -brands -retrieve -##ublish -formations -microgravity -wolbachia -coin -angiographically -decompensation -pyrazol -photosensitizer -anthelmin -antiemetic -hyperex -373 -pri -ellipso -##ungin -cgi -excimer -intercalation -temp -ssri -essay -immunodeficient -afterload -photophysical -pleura -urchin -mosm -paresis -pseudop -##encephaly -correlational -##terdam -purify -urbanization -dodec -titrated -327 -hepatocarcin -nfk -ingrowth -tlr9 -baboons -karyotypes -lifestyles -carvedilol -osteochondral -disulph -smad3 -imbalances -##jugated -constell -##tage -invasively -banks -postintervention -committees -ethidium -mpv -meningitidis -cdc42 -cephalosporin -##oparas -ree -mpc -neurologists -wmd -wines -##othal -##ophages -lipo -manifesting -handedness -stair -secondarily -potentiates -##ilane -##unate -psychoactive -scarcity -metoclopramide -##tris -eigenv -gingivitis -unloading -adduction -pfos -symposium -prokaryotes -involution -scab -jc -##q21 -2alpha -larva -mismatches -##trast -glycemia -##hemoglobin -fellowship -tenth -glyphos -philadelphia -##ublished -rgcs -isthmus -scintigraphic -osmolarity -theor -inserting -contraindication -bronchodilator -wasp -indica -southwestern -ganciclovir -niss -jumping -kern -sebaceous -premolar -anaesthe -##ucent -glyceraldehyde -ail -dabig -saccadic -mtbi -##elly -isl -720 -sulfo -p19 -##ansetron -icm -glyphosate -gallate -overestimation -gerb -plethora -##core -rigorously -resemblance -dabigatran -concise -whr -cardiologists -ionomer -630 -withdrawing -intelligent -##kyo -unequivocally -installation -tackle -radiative -lettuce -perforin -1g -##entious -epoch -toothbr -intrapartum -dans -plasmal -scatchard -burkholder -331 -##dt -regularity -plasmapheresis -omics -unbalanced -frontotemporal -321 -blu -tubing -##tier -refusal -multicentric -strontium -##ubular -omission -amplicon -s9 -crist -physiotherap -parasitism -recalcitrant -##agin -astr -pravastatin -coag -##ozin -ribozyme -millil -fep -myelosupp -ly294002 -asl -putida -##axine -spironolactone -##aeus -lactide -##onvulsive -vasculopathy -##furan -##evolution -resumption -polypharm -brev -updates -aqp4 -##±7 -hemopoietic -definitively -extrapyramidal -ducks -intimately -geniculate -lipogenesis -decomposed -##iser -declared -stagn -dism -proportionally -hoech -cannulated -oophorectomy -##aration -2k -uteri -procoagulant -underpin -transforms -cfr -deoxyribonucleic -##edrine -intraf -jm -ctnt -marmos -sls -acridine -postr -causation -fz -mtp -disasters -tmt -pericytes -##acylglycerols -surpass -##ertz -so4 -##ynchus -spinning -myoclonus -relaxin -bioequ -hans -##ophenone -##fract -retroviruses -avulsion -collapsed -dorsi -alendronate -##months -cars -hypoal -midwif -##olae -morbidly -disappointing -dermatologic -disassembly -enumeration -hig -idus -ondansetron -nls -##ycholate -glen -successes -valproic -researched -pharmacogenetic -511 -alkanes -director -cnvs -electrospun -multiforme -pcdd -clerks -monophasic -milling -##a4 -stew -##roblast -hypovol -pastoris -supercritical -electrophilic -ehrlich -litters -sba -hut -contingency -anodic -euros -stec -##bridge -lyophilized -##laf -hexokinase -pits -unpublished -##wr -##iasmatic -gathering -##stand -monophyletic -join -ectoderm -##terp -flaw -finishing -##rosthesis -##nh -mofs -##aud -psma -##wire -tokyo -acylation -351 -triamcin -dcr -##emoglobin -hypopnea -hyperventilation -##aglu -##idated -##waters -paraf -334 -##otrich -escs -hrct -multistage -bug -israeli -reforms -hoechst -5alpha -337 -melit -341 -ebna -spindles -##opolymers -##otyrosine -quinoline -austria -nitrox -hfp -phl -ninth -dcd -al2o3 -##icates -plr -prominently -amphibians -##chid -##obiology -pyrrole -##azolamide -immunogold -boiling -microsatellites -subfamilies -oak -##omyositis -augments -flattened -azido -narcol -c10 -jac -transfectants -##imited -photos -sensitis -ceus -regained -##oxifene -##efaciens -biologics -##ething -explosion -informing -##ocampal -noncompl -tdf -hiaa -igan -arachnoid -deregulated -nanocl -glycolipid -adeno -##afs -aus -##anoate -multinucleated -##oraph -approximations -ameliorating -something -motivations -hydrolysate -##tase -endobronchial -extents -pdf -acetylglucosamine -boar -l6 -txa2 -##perfusion -##ogranin -vesico -ppe -posttranscription -hypospadias -iκb -mitigating -diving -outweigh -##isic -carniv -##hipp -inat -penetrated -ests -perinuclear -nymphs -telangiectasia -##ferior -porcelain -travers -midwifery -unimp -##atemia -fauna -glut1 -cholangiopancre -##olo -amper -phosphoenol -##ayered -repulsive -award -chimerism -porphyrins -exudates -##parous -##iprazole -factory -##illar -amplicons -##nitros -meps -##ordinate -spermatocytes -dct -janus -recurs -flexural -phylogen -##roplasties -##ador -jer -dyslipid -intrarenal -buildings -ureteric -lend -##oplication -plexiform -1100 -multisp -etched -biopsied -headspace -sunsc -healthier -##roscope -##onous -##qc -##olac -paraph -cellulitis -deliberate -fluoroquinolone -storing -##othio -malarial -##osylcer -##7t -varicocele -maternally -##emes -interpolation -chemoprevention -bx -intergroup -ivm -galpha -bra -cdk4 -rhabdomyolysis -pedigrees -violations -transmittance -pgh -hydrops -foliar -mids -##uy -syrian -##tick -triamcinolone -quinolone -serp -wett -dio -lactams -citrulline -acculturation -arena -aspirated -chicago -660 -deoxyglucose -haematoma -reinn -mdi -simplest -412 -prf -scand -toronto -armed -exfoli -undoubtedly -##ascin -dinitroph -arthropod -##yelinated -dichotomous -endotoxemia -propanol -archival -nachrs -handgrip -contusion -ucb -vlbw -nigros -autoreactive -numeric -repairing -o6 -pmt -flt -circr -radiolabelled -pmp -complements -eot -##omatoid -noble -339 -##dlers -damping -mall -gingiva -##owa -anaphylactic -incorrectly -enteritis -neurosurge -phylum -pcps -##olinic -##imoto -immunodom -amniocentesis -caval -unintentional -visualizing -offices -microsp -panoramic -impurity -lda -p7 -thyrotoxic -gibbs -hnp -overr -≥50 -proposition -##othoracic -valent -funds -boards -##artite -pleasant -monozyg -densitometry -dme -##rotin -coerc -37°c -mcv -##almit -diastole -sweating -crus -irritability -##iced -immunol -##ogly -remissions -##tening -digitor -##itica -intratracheal -51cr -distractor -tdr -##omicroscopy -beside -5mg -overexpress -fishing -situational -mog -distinguishes -appendage -multiplexed -srebp -accelerator -##enesulf -dependencies -fragilis -monoclinic -confounded -dang -neurologically -invariance -stakeholder -mosaicism -tartrate -##metry -elapsed -##eptidyl -symbiosis -##hz -prolyl -laminectomy -consulting -crm -mgo -nucleosomes -autoc -##ethoxy -7a -adiabatic -vre -odors -succinyl -hyperhom -kilobase -##obenzyl -monomethyl -xylanase -##ibrin -removable -biting -overlaps -addicts -turp -multich -##aco -suppressors -reappear -deceleration -catalyzing -cauda -##5ac -dips -nj -##ager -appliances -scalar -desaturase -artemisinin -vec -##uloplasmin -perforator -kindling -364 -orchid -tetrahedral -enoxaparin -trac -corynebacterium -##attern -photovoltaic -electricity -diffusing -p15 -abca1 -oct4 -auc0 -ero -##qx -##ithi -19th -atpases -desiccation -##idinyl -mre -respondent -rehydration -conclusively -medulloblastoma -advocates -pme -biophys -infiltrative -stereois -settlement -dr4 -barth -gpc -mentors -spd -phenanthrene -##ablation -##craft -cryotherapy -≥10 -decarbox -scrapie -##trium -hyperuricemia -ata -mp2 -##empfer -##egm -orthologous -facultative -bom -precancerous -##cytidine -diamine -pao -repeatable -chimera -communicative -mnsod -preadip -milligr -figo -igg2a -n0 -bifidobacterium -evidently -##hf -dendrimer -leprae -digitorum -menten -saponin -mould -spawning -dpc -pentyl -##enk -chur -ensembles -neuroleptics -##osfamide -absor -##v6 -octahedral -##answ -pulmon -356 -opacification -##adders -##aglobulin -tourniquet -enema -ferred -forget -nonverbal -iva -352 -meticulous -abscisic -burkholderia -gii -influent -##pots -botan -348 -dihydrox -micrometast -digitized -rs22 -lips -pfo -igd -visitors -cd11c -lights -emf -##engine -ethm -chlorpyr -cisterna -##opyranoside -autoradiographic -perchlorate -synonym -##ercise -stereochemistry -##wall -dyslexia -excursion -tesla -autophosphorylation -##ipin -upar -resurf -sterilized -granulomatosis -µmol -eicosapentaenoic -##castle -##ecretion -structuring -##phys -bulbar -ketoac -sydney -mnc -extramedullary -rhu -tropism -hypophosph -clarifying -judgement -jord -puzz -diplopia -deoxynucleotidyl -lympho -##alogy -##6c -clothing -tph +version https://git-lfs.github.com/spec/v1 +oid sha256:7b36651908a88bc38bda41b728b2a598191e0d3b553cbacf7b1e5f026d5b5b9f +size 225062 diff --git a/requirements-py310.txt b/requirements-py310.txt index 6df98a47edbf0bc60dddcb002e387042a0bc4b23..16868c5c1abefa861d5d9d9b17a6a0542838cb27 100644 --- a/requirements-py310.txt +++ b/requirements-py310.txt @@ -1,7 +1,7 @@ -tensorflow==2.8 -transformers==4.37.2 -stanza==1.4.0 -spacy==3.2.4 -bioc==2.0.post4 -spacy==3.2.4 +tensorflow==2.8 +transformers==4.37.2 +stanza==1.4.0 +spacy==3.2.4 +bioc==2.0.post4 +spacy==3.2.4 protobuf==3.20.1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 8fc3b5bec35c47d0f928bded38caeb0c5a07feef..0818e1479fa6609fd63a87c4f80163458032796c 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,76 +1,76 @@ -absl-py -astunparse -attrs -bioc -blis -cachetools -catalogue -certifi -charset-normalizer -click -cymem -emoji -filelock -gast -google-auth -google-auth-oauthlib -google-pasta -grpcio -h5py -huggingface-hub -idna -importlib-metadata -intervaltree -Jinja2 -joblib -jsonlines -Keras-Preprocessing -langcodes -lxml -Markdown -MarkupSafe -murmurhash -numpy -oauthlib -opt-einsum -packaging -pathy -preshed -protobuf -pyasn1 -pyasn1-modules -pydantic -pyparsing -PyYAML -regex -requests -requests-oauthlib -rsa -sacremoses -scipy -six -smart-open -sortedcontainers -spacy -spacy-legacy -spacy-loggers -srsly -stanza -tensorboard -tensorboard-data-server -tensorboard-plugin-wit -tensorflow -tensorflow-estimator -termcolor -thinc -tokenizers -torch -tqdm -transformers -typer -typing_extensions -urllib3 -wasabi -Werkzeug -wrapt -zipp +absl-py +astunparse +attrs +bioc +blis +cachetools +catalogue +certifi +charset-normalizer +click +cymem +emoji +filelock +gast +google-auth +google-auth-oauthlib +google-pasta +grpcio +h5py +huggingface-hub +idna +importlib-metadata +intervaltree +Jinja2 +joblib +jsonlines +Keras-Preprocessing +langcodes +lxml +Markdown +MarkupSafe +murmurhash +numpy +oauthlib +opt-einsum +packaging +pathy +preshed +protobuf +pyasn1 +pyasn1-modules +pydantic +pyparsing +PyYAML +regex +requests +requests-oauthlib +rsa +sacremoses +scipy +six +smart-open +sortedcontainers +spacy +spacy-legacy +spacy-loggers +srsly +stanza +tensorboard +tensorboard-data-server +tensorboard-plugin-wit +tensorflow +tensorflow-estimator +termcolor +thinc +tokenizers +torch +tqdm +transformers +typer +typing_extensions +urllib3 +wasabi +Werkzeug +wrapt +zipp diff --git a/run_batches.py b/run_batches.py index e9ba76e11b7a7463cb13e0a10c8c57fdfc18b9d1..063af415a4eb555288c9f4576a8913d59259a117 100644 --- a/run_batches.py +++ b/run_batches.py @@ -1,12 +1,19 @@ import argparse import logging +import os import shutil import subprocess -import time -from datetime import timedelta from pathlib import Path from tempfile import TemporaryDirectory +from tqdm.contrib.concurrent import process_map + + +def batch(iterable, n=1): + l = len(iterable) + for ndx in range(0, l, n): + yield iterable[ndx : min(ndx + n, l)] + def main(): logging.basicConfig(level=logging.INFO) @@ -14,7 +21,8 @@ def main(): parser.add_argument("--mode", type=str, default="gnorm2", help="mode to run in (gnorm2, gnormplus)") parser.add_argument("input_dir", type=str, help="directory containing files to process") parser.add_argument("output_dir", type=str, help="directory to write processed files to") - parser.add_argument("--batch_size", type=int, default=64) + parser.add_argument("--batch_size", type=int, default=8) + parser.add_argument("--max_workers", type=int, default=os.cpu_count() - 4) args = parser.parse_args() input_dir = Path(args.input_dir) @@ -32,51 +40,57 @@ def main(): logging.info(f"Processing {len(input_files)} files") - - while input_files: - start = time.time() - logging.info(f"{len(input_files)} remaining files") - input_files_batch = list(input_files)[: args.batch_size] - - with TemporaryDirectory() as temp_dir_SR, TemporaryDirectory() as temp_dir_GNR, TemporaryDirectory() as temp_dir_SA, TemporaryDirectory() as input_temp_dir, TemporaryDirectory() as output_temp_dir: - input_temp_dir = Path(input_temp_dir) - output_temp_dir = Path(output_temp_dir) - for file in input_files_batch: - shutil.copy(input_dir / file, input_temp_dir) - - if args.mode == "gnorm2": - command_SR = f"java -Xmx60G -Xms30G -jar GNormPlus.jar {str(input_temp_dir)} {str(temp_dir_SR)} setup.SR.txt" - command_GNR_SA = f"python GeneNER_SpeAss_run.py -i {str(temp_dir_SR)} -r {str(temp_dir_GNR)} -a {str(temp_dir_SA)} -n gnorm_trained_models/geneNER/GeneNER-Bioformer.h5 -s gnorm_trained_models/SpeAss/SpeAss-Bioformer.h5" - command_GN = f"java -Xmx60G -Xms30G -jar GNormPlus.jar {str(temp_dir_SA)} {str(output_temp_dir)} setup.GN.txt" - commands = [command_SR, command_GNR_SA, command_GN] - elif args.mode == "gnormplus": - commands = [f"java -Xmx60G -Xms30G -jar GNormPlus.jar {str(input_temp_dir)} {str(output_temp_dir)} setup.txt"] - else: - raise ValueError(f"Invalid mode: {args.mode}") - - for command in commands: - try: - logging.info(command) - subprocess.run([command], check=True, shell=True) - except subprocess.CalledProcessError as e: - logging.exception(f"Error running command: {command}") - raise e - - output_paths = output_temp_dir.rglob("*") - output_files = set(file.name for file in output_paths) - for output_path, output_file in zip(output_paths, output_files): - shutil.copy(output_path, output_dir) - input_files.remove(output_file) - end = time.time() - logging.info(f"Processed {len(output_files)} files in {timedelta(seconds=end - start)}") - - if not len(output_files): - raise Exception("No files were output") - - if output_files: - logging.info( - f"Estimated time remaining: {timedelta(seconds=(end - start) * len(input_files) / output_files)}" - ) + input_files = sorted(input_files, key=lambda file: (input_dir / file).stat().st_size) + + input_files_batches = list(batch(list(input_files), args.batch_size)) + process_map( + run_batch, + input_files_batches, + [input_dir] * len(input_files_batches), + [output_dir] * len(input_files_batches), + [args.mode] * len(input_files_batches), + max_workers=args.max_workers, + chunksize=1, + ) + + +def run_batch(input_files_batch, input_dir, output_dir, mode): + with TemporaryDirectory() as temp_dir_SR, TemporaryDirectory() as temp_dir_GNR, TemporaryDirectory() as temp_dir_SA, TemporaryDirectory() as input_temp_dir, TemporaryDirectory() as output_temp_dir: + input_temp_dir = Path(input_temp_dir) + output_temp_dir = Path(output_temp_dir) + for file in input_files_batch: + logging.info(f"cp {input_dir / file} {input_temp_dir}") + shutil.copy(input_dir / file, input_temp_dir) + + if mode == "gnorm2": + command_SR = ( + f"java -Xmx32G -Xms16G -jar GNormPlus.jar {str(input_temp_dir)} {str(temp_dir_SR)} setup.SR.txt" + ) + command_GNR_SA = f"python GeneNER_SpeAss_run.py -i {str(temp_dir_SR)} -r {str(temp_dir_GNR)} -a {str(temp_dir_SA)} -n gnorm_trained_models/geneNER/GeneNER-Bioformer.h5 -s gnorm_trained_models/SpeAss/SpeAss-Bioformer.h5" + command_GN = ( + f"java -Xmx32G -Xms16G -jar GNormPlus.jar {str(temp_dir_SA)} {str(output_temp_dir)} setup.GN.txt" + ) + commands = [command_SR, command_GNR_SA, command_GN] + elif mode == "gnormplus": + commands = [ + f"java -Xmx32G -Xms16G -jar GNormPlus.jar {str(input_temp_dir)} {str(output_temp_dir)} setup.txt" + ] + else: + raise ValueError(f"Invalid mode: {mode}") + + for command in commands: + try: + logging.info(command) + subprocess.run([command], check=True, shell=True) + except subprocess.CalledProcessError as e: + logging.exception(f"Error running command: {command}") + raise e + + output_paths = list(output_temp_dir.rglob("*")) + for output_path in output_paths: + logging.info(f"cp {output_path} {output_dir}") + shutil.copy(output_path, output_dir) + output_file = output_path.name if __name__ == "__main__": diff --git a/src_Java/GNormPluslib/BioCDoc.java b/src_Java/GNormPluslib/BioCDoc.java index bd7c1ffdf20dbd7e999f1511d0a6f546a749bc13..beeac4520d7177f006bc2e68fd9e05ca0df9735e 100644 --- a/src_Java/GNormPluslib/BioCDoc.java +++ b/src_Java/GNormPluslib/BioCDoc.java @@ -1,1344 +1,1344 @@ -/** - * Project: GNormPlus - * Function: Data storage in BioC format - */ - -package GNormPluslib; - -import bioc.BioCAnnotation; -import bioc.BioCCollection; -import bioc.BioCDocument; -import bioc.BioCLocation; -import bioc.BioCPassage; - -import bioc.io.BioCDocumentWriter; -import bioc.io.BioCFactory; -import bioc.io.woodstox.ConnectorWoodstox; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.UnsupportedEncodingException; -import java.time.LocalDate; -import java.time.ZoneId; - -import javax.xml.stream.XMLStreamException; - -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -public class BioCDoc -{ - /* - * Contexts in BioC file - */ - public ArrayList PMIDs=new ArrayList(); // Type: PMIDs - public ArrayList> PassageNames = new ArrayList(); // PassageName - public ArrayList> PassageOffsets = new ArrayList(); // PassageOffset - public ArrayList> PassageContexts = new ArrayList(); // PassageContext - public ArrayList>> Annotations = new ArrayList(); // Annotation - GNormPlus - - public String BioCFormatCheck(String InputFile) throws IOException - { - - ConnectorWoodstox connector = new ConnectorWoodstox(); - BioCCollection collection = new BioCCollection(); - try - { - collection = connector.startRead(new InputStreamReader(new FileInputStream(InputFile), "UTF-8")); - } - catch (UnsupportedEncodingException | FileNotFoundException | XMLStreamException e) - { - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(InputFile), "UTF-8")); - String line=""; - String status=""; - String Pmid = ""; - boolean tiabs=false; - Pattern patt = Pattern.compile("^([^\\|\\t]+)\\|([^\\|\\t]+)\\|(.*)$"); - while ((line = br.readLine()) != null) - { - Matcher mat = patt.matcher(line); - if(mat.find()) //Title|Abstract - { - if(Pmid.equals("")) - { - Pmid = mat.group(1); - } - else if(!Pmid.equals(mat.group(1))) - { - return "[Error]: "+InputFile+" - A blank is needed between "+Pmid+" and "+mat.group(1)+"."; - } - status = "tiabs"; - tiabs = true; - } - else if (line.contains("\t")) //Annotation - { - } - else if(line.length()==0) //Processing - { - if(status.equals("")) - { - if(Pmid.equals("")) - { - return "[Error]: "+InputFile+" - It's neither BioC nor PubTator format. PMID is empty."; - } - else - { - return "[Error]: "+InputFile+" - A redundant blank is after "+Pmid+"."; - } - } - Pmid=""; - status=""; - } - } - br.close(); - if(tiabs == false) - { - return "[Error]: "+InputFile+" - It's neither BioC nor PubTator format."; - } - if(status.equals("")) - { - return "PubTator"; - } - else - { - return "[Error]: "+InputFile+" - The last column missed a blank."; - } - } - return "BioC"; - } - public void PubTator2BioC(String input,String output) throws IOException, XMLStreamException // Input - { - /* - * PubTator2BioC - */ - String parser = BioCFactory.WOODSTOX; - BioCFactory factory = BioCFactory.newFactory(parser); - BioCDocumentWriter BioCOutputFormat = factory.createBioCDocumentWriter(new OutputStreamWriter(new FileOutputStream(output), "UTF-8")); - BioCCollection biocCollection = new BioCCollection(); - - //time - ZoneId zonedId = ZoneId.of( "America/Montreal" ); - LocalDate today = LocalDate.now( zonedId ); - biocCollection.setDate(today.toString()); - - biocCollection.setKey("BioC.key");//key - biocCollection.setSource("GNormPlus");//source - - BioCOutputFormat.writeCollectionInfo(biocCollection); - BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(input), "UTF-8")); - ArrayList ParagraphType=new ArrayList(); // Type: Title|Abstract - ArrayList ParagraphContent = new ArrayList(); // Text - ArrayList annotations = new ArrayList(); // Annotation - String line; - String Pmid=""; - while ((line = inputfile.readLine()) != null) - { - if(line.contains("|") && !line.contains("\t")) //Title|Abstract - { - String str[]=line.split("\\|",-1); - Pmid=str[0]; - if(str[1].equals("t")) - { - str[1]="title"; - } - if(str[1].equals("a")) - { - str[1]="abstract"; - } - ParagraphType.add(str[1]); - if(str.length==3) - { - String txt = str[2]; - txt = txt.replaceAll("ω","w"); - txt = txt.replaceAll("μ","u"); - txt = txt.replaceAll("κ","k"); - txt = txt.replaceAll("α","a"); - txt = txt.replaceAll("γ","g"); - txt = txt.replaceAll("ɣ","g"); - txt = txt.replaceAll("β","b"); - txt = txt.replaceAll("×","x"); - txt = txt.replaceAll("‑","-"); - txt = txt.replaceAll("¹","1"); - txt = txt.replaceAll("²","2"); - txt = txt.replaceAll("°","o"); - txt = txt.replaceAll("ö","o"); - txt = txt.replaceAll("é","e"); - txt = txt.replaceAll("à","a"); - txt = txt.replaceAll("Á","A"); - txt = txt.replaceAll("ε","e"); - txt = txt.replaceAll("θ","O"); - txt = txt.replaceAll("•","."); - txt = txt.replaceAll("µ","u"); - txt = txt.replaceAll("λ","r"); - txt = txt.replaceAll("⁺","+"); - txt = txt.replaceAll("ν","v"); - txt = txt.replaceAll("ï","i"); - txt = txt.replaceAll("ã","a"); - txt = txt.replaceAll("≡","="); - txt = txt.replaceAll("ó","o"); - txt = txt.replaceAll("³","3"); - txt = txt.replaceAll("〖","["); - txt = txt.replaceAll("〗","]"); - txt = txt.replaceAll("Å","A"); - txt = txt.replaceAll("ρ","p"); - txt = txt.replaceAll("ü","u"); - txt = txt.replaceAll("ɛ","e"); - txt = txt.replaceAll("č","c"); - txt = txt.replaceAll("š","s"); - txt = txt.replaceAll("ß","b"); - txt = txt.replaceAll("═","="); - txt = txt.replaceAll("£","L"); - txt = txt.replaceAll("Ł","L"); - txt = txt.replaceAll("ƒ","f"); - txt = txt.replaceAll("ä","a"); - txt = txt.replaceAll("–","-"); - txt = txt.replaceAll("⁻","-"); - txt = txt.replaceAll("〈","<"); - txt = txt.replaceAll("〉",">"); - txt = txt.replaceAll("χ","X"); - txt = txt.replaceAll("Đ","D"); - txt = txt.replaceAll("‰","%"); - txt = txt.replaceAll("·","."); - txt = txt.replaceAll("→",">"); - txt = txt.replaceAll("←","<"); - txt = txt.replaceAll("ζ","z"); - txt = txt.replaceAll("π","p"); - txt = txt.replaceAll("τ","t"); - txt = txt.replaceAll("ξ","X"); - txt = txt.replaceAll("η","h"); - txt = txt.replaceAll("ø","0"); - txt = txt.replaceAll("Δ","D"); - txt = txt.replaceAll("∆","D"); - txt = txt.replaceAll("∑","S"); - txt = txt.replaceAll("Ω","O"); - txt = txt.replaceAll("δ","d"); - txt = txt.replaceAll("σ","s"); - txt = txt.replaceAll("Φ","F"); - txt = txt.replaceAll("[^\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\{\\}\\|\\:\"\\<\\>\\?\\`\\-\\=\\[\\]\\;\\'\\,\\.\\/\\r\\n0-9a-zA-Z ]"," "); - ParagraphContent.add(txt); - } - else - { - ParagraphContent.add("- No text -"); - } - } - else if (line.contains("\t")) //Annotation - { - String anno[]=line.split("\t"); - if(anno.length==6) - { - annotations.add(anno[1]+"\t"+anno[2]+"\t"+anno[3]+"\t"+anno[4]+"\t"+anno[5]); - } - else if(anno.length==5) - { - annotations.add(anno[1]+"\t"+anno[2]+"\t"+anno[3]+"\t"+anno[4]); - } - } - else if(line.length()==0) //Processing - { - BioCDocument biocDocument = new BioCDocument(); - biocDocument.setID(Pmid); - int startoffset=0; - for(int i=0;i Infons = new HashMap(); - Infons.put("type", ParagraphType.get(i)); - biocPassage.setInfons(Infons); - biocPassage.setText(ParagraphContent.get(i)); - biocPassage.setOffset(startoffset); - startoffset=startoffset+ParagraphContent.get(i).length()+1; - for(int j=0;j=startoffset-ParagraphContent.get(i).length()-1) - { - BioCAnnotation biocAnnotation = new BioCAnnotation(); - Map AnnoInfons = new HashMap(); - if(anno.length==5) - { - AnnoInfons.put("Identifier", anno[4]); - } - AnnoInfons.put("type", anno[3]); - biocAnnotation.setInfons(AnnoInfons); - BioCLocation location = new BioCLocation(); - location.setOffset(Integer.parseInt(anno[0])); - location.setLength(Integer.parseInt(anno[1])-Integer.parseInt(anno[0])); - biocAnnotation.setLocation(location); - biocAnnotation.setText(anno[2]); - biocPassage.addAnnotation(biocAnnotation); - } - } - biocDocument.addPassage(biocPassage); - } - biocCollection.addDocument(biocDocument); - ParagraphType.clear(); - ParagraphContent.clear(); - annotations.clear(); - BioCOutputFormat.writeDocument(biocDocument); - } - } - BioCOutputFormat.close(); - inputfile.close(); - } - public void BioC2PubTator(String input,String output) throws IOException, XMLStreamException //Output - { - /* - * BioC2PubTator - */ - HashMap pmidlist = new HashMap(); // check if appear duplicate pmids - boolean duplicate = false; - BufferedWriter PubTatorOutputFormat = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output), "UTF-8")); - ConnectorWoodstox connector = new ConnectorWoodstox(); - BioCCollection collection = new BioCCollection(); - collection = connector.startRead(new InputStreamReader(new FileInputStream(input), "UTF-8")); - while (connector.hasNext()) - { - BioCDocument document = connector.next(); - String PMID = document.getID(); - if(pmidlist.containsKey(PMID)){System.out.println("\nError: duplicate pmid-"+PMID);duplicate = true;} - else{pmidlist.put(PMID,"");} - String Anno=""; - for (BioCPassage passage : document.getPassages()) - { - if(passage.getInfon("type").equals("title")) - { - PubTatorOutputFormat.write(PMID+"|t|"+passage.getText()+"\n"); - } - else if(passage.getInfon("type").equals("abstract")) - { - PubTatorOutputFormat.write(PMID+"|a|"+passage.getText()+"\n"); - } - else - { - PubTatorOutputFormat.write(PMID+"|"+passage.getInfon("type")+"|"+passage.getText()+"\n"); - } - - for (BioCAnnotation annotation : passage.getAnnotations()) - { - String Annotype = annotation.getInfon("type"); - String Annoid=""; - String Proteinid=""; - if(Annotype.matches("(Gene|FamilyName|DomainMotif)")) - { - if(annotation.getInfons().containsKey("NCBI Gene")) - { - Annoid = annotation.getInfon("NCBI Gene"); - String Annoidlist[]=Annoid.split(";"); - Annoid=""; - for(int x=0;x ParagraphContent = new HashMap(); // [PMID,0] -> title - HashMap annotations = new HashMap(); // PMID ->Annotation - String line; - String Pmid=""; - int count_paragraph=0; - while ((line = inputfile.readLine()) != null) - { - if(line.contains("|") && !line.contains("\t")) //Title|Abstract - { - String str[]=line.split("\\|",-1); - Pmid=str[0]; - ParagraphContent.put(Pmid+"\t"+str[1],str[2]); - count_paragraph++; - } - else if (line.contains("\t")) //Annotation - { - annotations.put(Pmid, annotations.get(Pmid)+line); - } - else if(line.length()==0) //Processing - { - count_paragraph=0; - } - } - inputfile.close(); - - /* - * BioC2PubTator - */ - HashMap pmidlist = new HashMap(); // check if appear duplicate pmids - boolean duplicate = false; - BufferedWriter PubTatorOutputFormat = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output), "UTF-8")); - ConnectorWoodstox connector = new ConnectorWoodstox(); - BioCCollection collection = new BioCCollection(); - collection = connector.startRead(new InputStreamReader(new FileInputStream(input), "UTF-8")); - while (connector.hasNext()) - { - BioCDocument document = connector.next(); - String PMID = document.getID(); - if(pmidlist.containsKey(PMID)){System.out.println("\nError: duplicate pmid-"+PMID);duplicate = true;} - else{pmidlist.put(PMID,"");} - String Anno=""; - for (BioCPassage passage : document.getPassages()) - { - if(passage.getInfon("type").equals("title") || passage.getInfon("type").equals("t")) - { - PubTatorOutputFormat.write(PMID+"|t|"+ParagraphContent.get(PMID+"\tt")+"\n"); - } - else if(passage.getInfon("type").equals("abstract") || passage.getInfon("type").equals("a")) - { - PubTatorOutputFormat.write(PMID+"|a|"+ParagraphContent.get(PMID+"\ta")+"\n"); - } - else - { - PubTatorOutputFormat.write(PMID+"|"+passage.getInfon("type")+"|"+passage.getText()+"\n"); - } - - for (BioCAnnotation annotation : passage.getAnnotations()) - { - String Annotype = annotation.getInfon("type"); - String Annoid=""; - String Proteinid=""; - if(Annotype.matches("(Gene|FamilyName|DomainMotif)")) - { - if(annotation.getInfons().containsKey("NCBI Gene")) - { - Annoid = annotation.getInfon("NCBI Gene"); - String Annoidlist[]=Annoid.split(";"); - Annoid=""; - for(int x=0;x PassageName= new ArrayList(); // array of Passage name - ArrayList PassageOffset= new ArrayList(); // array of Passage offset - ArrayList PassageContext= new ArrayList(); // array of Passage context - ArrayList> AnnotationInPMID= new ArrayList(); // array of Annotations in the PassageName - - /* - * Per Passage - */ - for (BioCPassage passage : document.getPassages()) - { - PassageName.add(passage.getInfon("type")); //Paragraph - String txt = passage.getText(); - if(txt.matches("[\t ]+")) - { - txt = txt.replaceAll(".","@"); - } - else - { - //if(passage.getInfon("type").toLowerCase().equals("table")) - //{ - // txt=txt.replaceAll(" ", "|"); - //} - txt = txt.replaceAll("ω","w"); - txt = txt.replaceAll("μ","u"); - txt = txt.replaceAll("κ","k"); - txt = txt.replaceAll("α","a"); - txt = txt.replaceAll("γ","g"); - txt = txt.replaceAll("ɣ","g"); - txt = txt.replaceAll("β","b"); - txt = txt.replaceAll("×","x"); - txt = txt.replaceAll("‑","-"); - txt = txt.replaceAll("¹","1"); - txt = txt.replaceAll("²","2"); - txt = txt.replaceAll("°","o"); - txt = txt.replaceAll("ö","o"); - txt = txt.replaceAll("é","e"); - txt = txt.replaceAll("à","a"); - txt = txt.replaceAll("Á","A"); - txt = txt.replaceAll("ε","e"); - txt = txt.replaceAll("θ","O"); - txt = txt.replaceAll("•","."); - txt = txt.replaceAll("µ","u"); - txt = txt.replaceAll("λ","r"); - txt = txt.replaceAll("⁺","+"); - txt = txt.replaceAll("ν","v"); - txt = txt.replaceAll("ï","i"); - txt = txt.replaceAll("ã","a"); - txt = txt.replaceAll("≡","="); - txt = txt.replaceAll("ó","o"); - txt = txt.replaceAll("³","3"); - txt = txt.replaceAll("〖","["); - txt = txt.replaceAll("〗","]"); - txt = txt.replaceAll("Å","A"); - txt = txt.replaceAll("ρ","p"); - txt = txt.replaceAll("ü","u"); - txt = txt.replaceAll("ɛ","e"); - txt = txt.replaceAll("č","c"); - txt = txt.replaceAll("š","s"); - txt = txt.replaceAll("ß","b"); - txt = txt.replaceAll("═","="); - txt = txt.replaceAll("£","L"); - txt = txt.replaceAll("Ł","L"); - txt = txt.replaceAll("ƒ","f"); - txt = txt.replaceAll("ä","a"); - txt = txt.replaceAll("–","-"); - txt = txt.replaceAll("⁻","-"); - txt = txt.replaceAll("〈","<"); - txt = txt.replaceAll("〉",">"); - txt = txt.replaceAll("χ","X"); - txt = txt.replaceAll("Đ","D"); - txt = txt.replaceAll("‰","%"); - txt = txt.replaceAll("·","."); - txt = txt.replaceAll("→",">"); - txt = txt.replaceAll("←","<"); - txt = txt.replaceAll("ζ","z"); - txt = txt.replaceAll("π","p"); - txt = txt.replaceAll("τ","t"); - txt = txt.replaceAll("ξ","X"); - txt = txt.replaceAll("η","h"); - txt = txt.replaceAll("ø","0"); - txt = txt.replaceAll("Δ","D"); - txt = txt.replaceAll("∆","D"); - txt = txt.replaceAll("∑","S"); - txt = txt.replaceAll("Ω","O"); - txt = txt.replaceAll("δ","d"); - txt = txt.replaceAll("σ","s"); - txt = txt.replaceAll("Φ","F"); - //txt = txt.replaceAll("[^\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\{\\}\\|\\:\"\\<\\>\\?\\`\\-\\=\\[\\]\\;\\'\\,\\.\\/\\r\\n0-9a-zA-Z ]"," "); - } - if(passage.getText().equals("") || passage.getText().matches("[ ]+")) - { - PassageContext.add("-notext-"); //Context - } - else - { - PassageContext.add(txt); //Context - } - PassageOffset.add(passage.getOffset()); //Offset - ArrayList AnnotationInPassage= new ArrayList(); // array of Annotations in the PassageName - AnnotationInPMID.add(AnnotationInPassage); - } - PassageNames.add(PassageName); - PassageContexts.add(PassageContext); - PassageOffsets.add(PassageOffset); - Annotations.add(AnnotationInPMID); - } - } - public void BioCReaderWithAnnotation(String input) throws IOException, XMLStreamException - { - ConnectorWoodstox connector = new ConnectorWoodstox(); - BioCCollection collection = new BioCCollection(); - collection = connector.startRead(new InputStreamReader(new FileInputStream(input), "UTF-8")); - - /* - * Per document - */ - while (connector.hasNext()) - { - BioCDocument document = connector.next(); - PMIDs.add(document.getID()); - - ArrayList PassageName= new ArrayList(); // array of Passage name - ArrayList PassageOffset= new ArrayList(); // array of Passage offset - ArrayList PassageContext= new ArrayList(); // array of Passage context - ArrayList> AnnotationInPMID= new ArrayList(); // array of Annotations in the PassageName - - /* - * Per Passage - */ - for (BioCPassage passage : document.getPassages()) - { - PassageName.add(passage.getInfon("type")); //Paragraph - - String txt = passage.getText(); - if(txt.matches("[\t ]+")) - { - txt = txt.replaceAll(".","@"); - } - else - { - //if(passage.getInfon("type").toLowerCase().equals("table")) - //{ - // txt=txt.replaceAll(" ", "|"); - //} - txt = txt.replaceAll("ω","w"); - txt = txt.replaceAll("μ","u"); - txt = txt.replaceAll("κ","k"); - txt = txt.replaceAll("α","a"); - txt = txt.replaceAll("γ","g"); - txt = txt.replaceAll("ɣ","g"); - txt = txt.replaceAll("β","b"); - txt = txt.replaceAll("×","x"); - txt = txt.replaceAll("‑","-"); - txt = txt.replaceAll("¹","1"); - txt = txt.replaceAll("²","2"); - txt = txt.replaceAll("°","o"); - txt = txt.replaceAll("ö","o"); - txt = txt.replaceAll("é","e"); - txt = txt.replaceAll("à","a"); - txt = txt.replaceAll("Á","A"); - txt = txt.replaceAll("ε","e"); - txt = txt.replaceAll("θ","O"); - txt = txt.replaceAll("•","."); - txt = txt.replaceAll("µ","u"); - txt = txt.replaceAll("λ","r"); - txt = txt.replaceAll("⁺","+"); - txt = txt.replaceAll("ν","v"); - txt = txt.replaceAll("ï","i"); - txt = txt.replaceAll("ã","a"); - txt = txt.replaceAll("≡","="); - txt = txt.replaceAll("ó","o"); - txt = txt.replaceAll("³","3"); - txt = txt.replaceAll("〖","["); - txt = txt.replaceAll("〗","]"); - txt = txt.replaceAll("Å","A"); - txt = txt.replaceAll("ρ","p"); - txt = txt.replaceAll("ü","u"); - txt = txt.replaceAll("ɛ","e"); - txt = txt.replaceAll("č","c"); - txt = txt.replaceAll("š","s"); - txt = txt.replaceAll("ß","b"); - txt = txt.replaceAll("═","="); - txt = txt.replaceAll("£","L"); - txt = txt.replaceAll("Ł","L"); - txt = txt.replaceAll("ƒ","f"); - txt = txt.replaceAll("ä","a"); - txt = txt.replaceAll("–","-"); - txt = txt.replaceAll("⁻","-"); - txt = txt.replaceAll("〈","<"); - txt = txt.replaceAll("〉",">"); - txt = txt.replaceAll("χ","X"); - txt = txt.replaceAll("Đ","D"); - txt = txt.replaceAll("‰","%"); - txt = txt.replaceAll("·","."); - txt = txt.replaceAll("→",">"); - txt = txt.replaceAll("←","<"); - txt = txt.replaceAll("ζ","z"); - txt = txt.replaceAll("π","p"); - txt = txt.replaceAll("τ","t"); - txt = txt.replaceAll("ξ","X"); - txt = txt.replaceAll("η","h"); - txt = txt.replaceAll("ø","0"); - txt = txt.replaceAll("Δ","D"); - txt = txt.replaceAll("∆","D"); - txt = txt.replaceAll("∑","S"); - txt = txt.replaceAll("Ω","O"); - txt = txt.replaceAll("δ","d"); - txt = txt.replaceAll("σ","s"); - txt = txt.replaceAll("Φ","F"); - //txt = txt.replaceAll("[^\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\{\\}\\|\\:\"\\<\\>\\?\\`\\-\\=\\[\\]\\;\\'\\,\\.\\/\\r\\n0-9a-zA-Z ]"," "); - } - if(passage.getText().equals("") || passage.getText().matches("[ ]+")) - { - PassageContext.add("-notext-"); //Context - } - else - { - PassageContext.add(txt); //Context - } - PassageOffset.add(passage.getOffset()); //Offset - ArrayList AnnotationInPassage= new ArrayList(); // array of Annotations in the PassageName - - /* - * Per Annotation : - * start - * last - * mention - * type - * id - */ - for (BioCAnnotation Anno : passage.getAnnotations()) - { - int start = Anno.getLocations().get(0).getOffset()-passage.getOffset(); // start - int last = start + Anno.getLocations().get(0).getLength(); // last - String AnnoMention=Anno.getText(); // mention - String Annotype = Anno.getInfon("type"); // type - String Annoid = Anno.getInfon("Identifier"); // identifier | MESH - if(Annoid == null) - { - Annoid = Anno.getInfon("Identifier"); // identifier | MESH - } - if(Annoid == null || Annoid.equals("null")) - { - AnnotationInPassage.add(start+"\t"+last+"\t"+AnnoMention+"\t"+Annotype); //paragraph - } - else - { - AnnotationInPassage.add(start+"\t"+last+"\t"+AnnoMention+"\t"+Annotype+"\t"+Annoid); //paragraph - } - } - AnnotationInPMID.add(AnnotationInPassage); - } - PassageNames.add(PassageName); - PassageContexts.add(PassageContext); - PassageOffsets.add(PassageOffset); - Annotations.add(AnnotationInPMID); - } - } - public void BioCOutput(String input,String output, ArrayList>> Annotations,boolean Final,boolean RemovePreviousAnno) throws IOException, XMLStreamException - { - boolean ShowUnNormalizedMention = false; - if(GNormPlus.setup_hash.containsKey("ShowUnNormalizedMention") && GNormPlus.setup_hash.get("ShowUnNormalizedMention").equals("True")) - { - ShowUnNormalizedMention = true; - } - - BioCDocumentWriter BioCOutputFormat = BioCFactory.newFactory(BioCFactory.WOODSTOX).createBioCDocumentWriter(new OutputStreamWriter(new FileOutputStream(output), "UTF-8")); - BioCCollection biocCollection_input = new BioCCollection(); - BioCCollection biocCollection_output = new BioCCollection(); - - //input: BioC - ConnectorWoodstox connector = new ConnectorWoodstox(); - biocCollection_input = connector.startRead(new InputStreamReader(new FileInputStream(input), "UTF-8")); - BioCOutputFormat.writeCollectionInfo(biocCollection_input); - int i=0; //count for pmid - while (connector.hasNext()) - { - BioCDocument document_output = new BioCDocument(); - BioCDocument document_input = connector.next(); - String PMID=document_input.getID(); - document_output.setID(PMID); - int annotation_count=0; - int j=0; //count for paragraph - for (BioCPassage passage_input : document_input.getPassages()) - { - BioCPassage passage_output = passage_input; - - if(RemovePreviousAnno == true) //clean the previous annotation, if the NER result is provided - { - passage_output.clearAnnotations(); - } - else - { - for (BioCAnnotation annotation : passage_output.getAnnotations()) - { - annotation.setID(""+annotation_count); - annotation_count++; - } - } - - int passage_Offset = passage_input.getOffset(); - String passage_Text = passage_input.getText(); - ArrayList AnnotationInPassage = new ArrayList(); - //ArrayList AnnotationInPassage = Annotations.get(i).get(j); - if(Annotations.size()>i && Annotations.get(i).size()>j) - { - for(int a=0;alast) - { - String mention = Anno[2]; - if(Final == true && passage_Text.length()>=last) - { - mention = passage_Text.substring(start, last); - } - if(mention.matches(".*\t.*")) - { - Anno[3]=Anno[4]; - if(Anno.length>=6) - { - Anno[4]=Anno[5]; - } - } - String type = Anno[3]; - String id = ""; // optional - if(Anno.length>=5){id = Anno[4];} - if(Final == true) - { - for(int b=0;b=lastb) - { - mentionb = passage_Text.substring(startb, lastb); - } - if(mentionb.matches(".*\t.*")) - { - Annob[3]=Annob[4]; - if(Annob.length>=6) - { - Annob[4]=Annob[5]; - } - } - String typeb = Annob[3]; - String idb = ""; // optional - if(Annob.length>=5){idb = Annob[4];} - - if(start == startb && last == lastb && type.equals(typeb)) - { - found = true; - if(id.matches("(Focus|Right|Left|Prefix|GeneID|Tax):[0-9]+") && (!idb.equals(""))) - { - } - else if(idb.matches("(Focus|Right|Left|Prefix|GeneID|Tax):[0-9]+") && (!id.matches("(Focus|Right|Left|Prefix|GeneID|Tax):[0-9]+")) && (!id.equals(""))) - { - AnnotationInPassage.set(b, start+"\t"+last+"\t"+mention+"\t"+type+"\t"+id); - } - else - { - if(id.equals("")) - { - } - else - { - AnnotationInPassage.set(b, start+"\t"+last+"\t"+mention+"\t"+type+"\t"+idb+";"+id); - } - - } - break; - } - } - } - } - if(found == false) - { - AnnotationInPassage.add(Annotations.get(i).get(j).get(a)); - } - } - } - for(int a=0;a id_hash = new HashMap (); - if(Anno.length>=5) - { - int start = Integer.parseInt(Anno[0]); - int last = Integer.parseInt(Anno[1]); - String mention = Anno[2]; - if(Final == true && passage_Text.length()>=last) - { - mention = passage_Text.substring(start, last); - } - if(mention.matches(".*\t.*")) - { - Anno[3]=Anno[4]; - if(Anno.length>=6) - { - Anno[4]=Anno[5]; - } - } - String ids = Anno[4]; - String idlist[]=ids.split(","); - for(int b=0;blast) - { - String mention = Anno[2]; - if(Final == true && passage_Text.length()>=last) - { - mention = passage_Text.substring(start, last); - } - if(mention.matches(".*\t.*")) - { - Anno[3]=Anno[4]; - if(Anno.length>=6) - { - Anno[4]=Anno[5]; - } - } - String type = Anno[3]; - if(type.equals("GeneID")){type="Gene";} - BioCAnnotation biocAnnotation = new BioCAnnotation(); - Map AnnoInfons = new HashMap(); - AnnoInfons.put("type", type); - if(Anno.length>=5) - { - String identifier = Anno[4]; - if(Final == true && ShowUnNormalizedMention==false) - { - if(type.matches("(FamilyName|Domain|Gene)")) - { - Pattern ptmp0 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9\\;]+)$"); - Matcher mtmp0 = ptmp0.matcher(identifier); - Pattern ptmp1 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)$"); - Matcher mtmp1 = ptmp1.matcher(identifier); - Pattern ptmp2 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)$"); - Matcher mtmp2 = ptmp2.matcher(identifier); - Pattern ptmp3 = Pattern.compile("^Homo\\:([0-9]+)$"); - Matcher mtmp3 = ptmp3.matcher(identifier); - if(mtmp0.find()) - { - String Method_SA = mtmp0.group(1); - String TaxonomyID = mtmp0.group(2); - String NCBIGeneID = mtmp0.group(3); - if(GNormPlus.Normalization2Protein_hash.containsKey(NCBIGeneID)) - { - AnnoInfons.put("UniProt", GNormPlus.Normalization2Protein_hash.get(NCBIGeneID)); - } - if(GNormPlus.HomologeneID_hash.containsKey(NCBIGeneID)) - { - AnnoInfons.put("NCBI Homologene", GNormPlus.HomologeneID_hash.get(NCBIGeneID)); - } - AnnoInfons.put("NCBI Gene", NCBIGeneID); - } - else if(mtmp1.find()) - { - String Method_SA = mtmp1.group(1); - String TaxonomyID = mtmp1.group(2); - String NCBIGeneID = mtmp1.group(3); - String HomoID = mtmp1.group(4); - if(GNormPlus.Normalization2Protein_hash.containsKey(NCBIGeneID)) - { - AnnoInfons.put("UniProt", GNormPlus.Normalization2Protein_hash.get(NCBIGeneID)); - } - if(GNormPlus.HomologeneID_hash.containsKey(NCBIGeneID)) - { - AnnoInfons.put("NCBI Homologene", GNormPlus.HomologeneID_hash.get(NCBIGeneID)); - } - AnnoInfons.put("NCBI Gene", NCBIGeneID); - } - else if(mtmp2.find()) - { - String Method_SA = mtmp2.group(1); - String TaxonomyID = mtmp2.group(2); - AnnoInfons.put("FocusSpecies", "NCBITaxonomyID:"+TaxonomyID); - } - else if(mtmp3.find()) - { - String Method_SA = mtmp3.group(1); - String HomoID = mtmp3.group(2); - AnnoInfons.put("NCBI Homologene", HomoID); - } - else - { - String identifiers[] = identifier.split(";"); - if(identifiers.length>1) - { - ArrayList identifierSTR = new ArrayList(); - ArrayList ProteinidSTR = new ArrayList(); - ArrayList HomoidSTR = new ArrayList(); - for(int idi=0;idi PMIDs=new ArrayList(); // Type: PMIDs + public ArrayList> PassageNames = new ArrayList(); // PassageName + public ArrayList> PassageOffsets = new ArrayList(); // PassageOffset + public ArrayList> PassageContexts = new ArrayList(); // PassageContext + public ArrayList>> Annotations = new ArrayList(); // Annotation - GNormPlus + + public String BioCFormatCheck(String InputFile) throws IOException + { + + ConnectorWoodstox connector = new ConnectorWoodstox(); + BioCCollection collection = new BioCCollection(); + try + { + collection = connector.startRead(new InputStreamReader(new FileInputStream(InputFile), "UTF-8")); + } + catch (UnsupportedEncodingException | FileNotFoundException | XMLStreamException e) + { + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(InputFile), "UTF-8")); + String line=""; + String status=""; + String Pmid = ""; + boolean tiabs=false; + Pattern patt = Pattern.compile("^([^\\|\\t]+)\\|([^\\|\\t]+)\\|(.*)$"); + while ((line = br.readLine()) != null) + { + Matcher mat = patt.matcher(line); + if(mat.find()) //Title|Abstract + { + if(Pmid.equals("")) + { + Pmid = mat.group(1); + } + else if(!Pmid.equals(mat.group(1))) + { + return "[Error]: "+InputFile+" - A blank is needed between "+Pmid+" and "+mat.group(1)+"."; + } + status = "tiabs"; + tiabs = true; + } + else if (line.contains("\t")) //Annotation + { + } + else if(line.length()==0) //Processing + { + if(status.equals("")) + { + if(Pmid.equals("")) + { + return "[Error]: "+InputFile+" - It's neither BioC nor PubTator format. PMID is empty."; + } + else + { + return "[Error]: "+InputFile+" - A redundant blank is after "+Pmid+"."; + } + } + Pmid=""; + status=""; + } + } + br.close(); + if(tiabs == false) + { + return "[Error]: "+InputFile+" - It's neither BioC nor PubTator format."; + } + if(status.equals("")) + { + return "PubTator"; + } + else + { + return "[Error]: "+InputFile+" - The last column missed a blank."; + } + } + return "BioC"; + } + public void PubTator2BioC(String input,String output) throws IOException, XMLStreamException // Input + { + /* + * PubTator2BioC + */ + String parser = BioCFactory.WOODSTOX; + BioCFactory factory = BioCFactory.newFactory(parser); + BioCDocumentWriter BioCOutputFormat = factory.createBioCDocumentWriter(new OutputStreamWriter(new FileOutputStream(output), "UTF-8")); + BioCCollection biocCollection = new BioCCollection(); + + //time + ZoneId zonedId = ZoneId.of( "America/Montreal" ); + LocalDate today = LocalDate.now( zonedId ); + biocCollection.setDate(today.toString()); + + biocCollection.setKey("BioC.key");//key + biocCollection.setSource("GNormPlus");//source + + BioCOutputFormat.writeCollectionInfo(biocCollection); + BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(input), "UTF-8")); + ArrayList ParagraphType=new ArrayList(); // Type: Title|Abstract + ArrayList ParagraphContent = new ArrayList(); // Text + ArrayList annotations = new ArrayList(); // Annotation + String line; + String Pmid=""; + while ((line = inputfile.readLine()) != null) + { + if(line.contains("|") && !line.contains("\t")) //Title|Abstract + { + String str[]=line.split("\\|",-1); + Pmid=str[0]; + if(str[1].equals("t")) + { + str[1]="title"; + } + if(str[1].equals("a")) + { + str[1]="abstract"; + } + ParagraphType.add(str[1]); + if(str.length==3) + { + String txt = str[2]; + txt = txt.replaceAll("ω","w"); + txt = txt.replaceAll("μ","u"); + txt = txt.replaceAll("κ","k"); + txt = txt.replaceAll("α","a"); + txt = txt.replaceAll("γ","g"); + txt = txt.replaceAll("ɣ","g"); + txt = txt.replaceAll("β","b"); + txt = txt.replaceAll("×","x"); + txt = txt.replaceAll("‑","-"); + txt = txt.replaceAll("¹","1"); + txt = txt.replaceAll("²","2"); + txt = txt.replaceAll("°","o"); + txt = txt.replaceAll("ö","o"); + txt = txt.replaceAll("é","e"); + txt = txt.replaceAll("à","a"); + txt = txt.replaceAll("Á","A"); + txt = txt.replaceAll("ε","e"); + txt = txt.replaceAll("θ","O"); + txt = txt.replaceAll("•","."); + txt = txt.replaceAll("µ","u"); + txt = txt.replaceAll("λ","r"); + txt = txt.replaceAll("⁺","+"); + txt = txt.replaceAll("ν","v"); + txt = txt.replaceAll("ï","i"); + txt = txt.replaceAll("ã","a"); + txt = txt.replaceAll("≡","="); + txt = txt.replaceAll("ó","o"); + txt = txt.replaceAll("³","3"); + txt = txt.replaceAll("〖","["); + txt = txt.replaceAll("〗","]"); + txt = txt.replaceAll("Å","A"); + txt = txt.replaceAll("ρ","p"); + txt = txt.replaceAll("ü","u"); + txt = txt.replaceAll("ɛ","e"); + txt = txt.replaceAll("č","c"); + txt = txt.replaceAll("š","s"); + txt = txt.replaceAll("ß","b"); + txt = txt.replaceAll("═","="); + txt = txt.replaceAll("£","L"); + txt = txt.replaceAll("Ł","L"); + txt = txt.replaceAll("ƒ","f"); + txt = txt.replaceAll("ä","a"); + txt = txt.replaceAll("–","-"); + txt = txt.replaceAll("⁻","-"); + txt = txt.replaceAll("〈","<"); + txt = txt.replaceAll("〉",">"); + txt = txt.replaceAll("χ","X"); + txt = txt.replaceAll("Đ","D"); + txt = txt.replaceAll("‰","%"); + txt = txt.replaceAll("·","."); + txt = txt.replaceAll("→",">"); + txt = txt.replaceAll("←","<"); + txt = txt.replaceAll("ζ","z"); + txt = txt.replaceAll("π","p"); + txt = txt.replaceAll("τ","t"); + txt = txt.replaceAll("ξ","X"); + txt = txt.replaceAll("η","h"); + txt = txt.replaceAll("ø","0"); + txt = txt.replaceAll("Δ","D"); + txt = txt.replaceAll("∆","D"); + txt = txt.replaceAll("∑","S"); + txt = txt.replaceAll("Ω","O"); + txt = txt.replaceAll("δ","d"); + txt = txt.replaceAll("σ","s"); + txt = txt.replaceAll("Φ","F"); + txt = txt.replaceAll("[^\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\{\\}\\|\\:\"\\<\\>\\?\\`\\-\\=\\[\\]\\;\\'\\,\\.\\/\\r\\n0-9a-zA-Z ]"," "); + ParagraphContent.add(txt); + } + else + { + ParagraphContent.add("- No text -"); + } + } + else if (line.contains("\t")) //Annotation + { + String anno[]=line.split("\t"); + if(anno.length==6) + { + annotations.add(anno[1]+"\t"+anno[2]+"\t"+anno[3]+"\t"+anno[4]+"\t"+anno[5]); + } + else if(anno.length==5) + { + annotations.add(anno[1]+"\t"+anno[2]+"\t"+anno[3]+"\t"+anno[4]); + } + } + else if(line.length()==0) //Processing + { + BioCDocument biocDocument = new BioCDocument(); + biocDocument.setID(Pmid); + int startoffset=0; + for(int i=0;i Infons = new HashMap(); + Infons.put("type", ParagraphType.get(i)); + biocPassage.setInfons(Infons); + biocPassage.setText(ParagraphContent.get(i)); + biocPassage.setOffset(startoffset); + startoffset=startoffset+ParagraphContent.get(i).length()+1; + for(int j=0;j=startoffset-ParagraphContent.get(i).length()-1) + { + BioCAnnotation biocAnnotation = new BioCAnnotation(); + Map AnnoInfons = new HashMap(); + if(anno.length==5) + { + AnnoInfons.put("Identifier", anno[4]); + } + AnnoInfons.put("type", anno[3]); + biocAnnotation.setInfons(AnnoInfons); + BioCLocation location = new BioCLocation(); + location.setOffset(Integer.parseInt(anno[0])); + location.setLength(Integer.parseInt(anno[1])-Integer.parseInt(anno[0])); + biocAnnotation.setLocation(location); + biocAnnotation.setText(anno[2]); + biocPassage.addAnnotation(biocAnnotation); + } + } + biocDocument.addPassage(biocPassage); + } + biocCollection.addDocument(biocDocument); + ParagraphType.clear(); + ParagraphContent.clear(); + annotations.clear(); + BioCOutputFormat.writeDocument(biocDocument); + } + } + BioCOutputFormat.close(); + inputfile.close(); + } + public void BioC2PubTator(String input,String output) throws IOException, XMLStreamException //Output + { + /* + * BioC2PubTator + */ + HashMap pmidlist = new HashMap(); // check if appear duplicate pmids + boolean duplicate = false; + BufferedWriter PubTatorOutputFormat = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output), "UTF-8")); + ConnectorWoodstox connector = new ConnectorWoodstox(); + BioCCollection collection = new BioCCollection(); + collection = connector.startRead(new InputStreamReader(new FileInputStream(input), "UTF-8")); + while (connector.hasNext()) + { + BioCDocument document = connector.next(); + String PMID = document.getID(); + if(pmidlist.containsKey(PMID)){System.out.println("\nError: duplicate pmid-"+PMID);duplicate = true;} + else{pmidlist.put(PMID,"");} + String Anno=""; + for (BioCPassage passage : document.getPassages()) + { + if(passage.getInfon("type").equals("title")) + { + PubTatorOutputFormat.write(PMID+"|t|"+passage.getText()+"\n"); + } + else if(passage.getInfon("type").equals("abstract")) + { + PubTatorOutputFormat.write(PMID+"|a|"+passage.getText()+"\n"); + } + else + { + PubTatorOutputFormat.write(PMID+"|"+passage.getInfon("type")+"|"+passage.getText()+"\n"); + } + + for (BioCAnnotation annotation : passage.getAnnotations()) + { + String Annotype = annotation.getInfon("type"); + String Annoid=""; + String Proteinid=""; + if(Annotype.matches("(Gene|FamilyName|DomainMotif)")) + { + if(annotation.getInfons().containsKey("NCBI Gene")) + { + Annoid = annotation.getInfon("NCBI Gene"); + String Annoidlist[]=Annoid.split(";"); + Annoid=""; + for(int x=0;x ParagraphContent = new HashMap(); // [PMID,0] -> title + HashMap annotations = new HashMap(); // PMID ->Annotation + String line; + String Pmid=""; + int count_paragraph=0; + while ((line = inputfile.readLine()) != null) + { + if(line.contains("|") && !line.contains("\t")) //Title|Abstract + { + String str[]=line.split("\\|",-1); + Pmid=str[0]; + ParagraphContent.put(Pmid+"\t"+str[1],str[2]); + count_paragraph++; + } + else if (line.contains("\t")) //Annotation + { + annotations.put(Pmid, annotations.get(Pmid)+line); + } + else if(line.length()==0) //Processing + { + count_paragraph=0; + } + } + inputfile.close(); + + /* + * BioC2PubTator + */ + HashMap pmidlist = new HashMap(); // check if appear duplicate pmids + boolean duplicate = false; + BufferedWriter PubTatorOutputFormat = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output), "UTF-8")); + ConnectorWoodstox connector = new ConnectorWoodstox(); + BioCCollection collection = new BioCCollection(); + collection = connector.startRead(new InputStreamReader(new FileInputStream(input), "UTF-8")); + while (connector.hasNext()) + { + BioCDocument document = connector.next(); + String PMID = document.getID(); + if(pmidlist.containsKey(PMID)){System.out.println("\nError: duplicate pmid-"+PMID);duplicate = true;} + else{pmidlist.put(PMID,"");} + String Anno=""; + for (BioCPassage passage : document.getPassages()) + { + if(passage.getInfon("type").equals("title") || passage.getInfon("type").equals("t")) + { + PubTatorOutputFormat.write(PMID+"|t|"+ParagraphContent.get(PMID+"\tt")+"\n"); + } + else if(passage.getInfon("type").equals("abstract") || passage.getInfon("type").equals("a")) + { + PubTatorOutputFormat.write(PMID+"|a|"+ParagraphContent.get(PMID+"\ta")+"\n"); + } + else + { + PubTatorOutputFormat.write(PMID+"|"+passage.getInfon("type")+"|"+passage.getText()+"\n"); + } + + for (BioCAnnotation annotation : passage.getAnnotations()) + { + String Annotype = annotation.getInfon("type"); + String Annoid=""; + String Proteinid=""; + if(Annotype.matches("(Gene|FamilyName|DomainMotif)")) + { + if(annotation.getInfons().containsKey("NCBI Gene")) + { + Annoid = annotation.getInfon("NCBI Gene"); + String Annoidlist[]=Annoid.split(";"); + Annoid=""; + for(int x=0;x PassageName= new ArrayList(); // array of Passage name + ArrayList PassageOffset= new ArrayList(); // array of Passage offset + ArrayList PassageContext= new ArrayList(); // array of Passage context + ArrayList> AnnotationInPMID= new ArrayList(); // array of Annotations in the PassageName + + /* + * Per Passage + */ + for (BioCPassage passage : document.getPassages()) + { + PassageName.add(passage.getInfon("type")); //Paragraph + String txt = passage.getText(); + if(txt.matches("[\t ]+")) + { + txt = txt.replaceAll(".","@"); + } + else + { + //if(passage.getInfon("type").toLowerCase().equals("table")) + //{ + // txt=txt.replaceAll(" ", "|"); + //} + txt = txt.replaceAll("ω","w"); + txt = txt.replaceAll("μ","u"); + txt = txt.replaceAll("κ","k"); + txt = txt.replaceAll("α","a"); + txt = txt.replaceAll("γ","g"); + txt = txt.replaceAll("ɣ","g"); + txt = txt.replaceAll("β","b"); + txt = txt.replaceAll("×","x"); + txt = txt.replaceAll("‑","-"); + txt = txt.replaceAll("¹","1"); + txt = txt.replaceAll("²","2"); + txt = txt.replaceAll("°","o"); + txt = txt.replaceAll("ö","o"); + txt = txt.replaceAll("é","e"); + txt = txt.replaceAll("à","a"); + txt = txt.replaceAll("Á","A"); + txt = txt.replaceAll("ε","e"); + txt = txt.replaceAll("θ","O"); + txt = txt.replaceAll("•","."); + txt = txt.replaceAll("µ","u"); + txt = txt.replaceAll("λ","r"); + txt = txt.replaceAll("⁺","+"); + txt = txt.replaceAll("ν","v"); + txt = txt.replaceAll("ï","i"); + txt = txt.replaceAll("ã","a"); + txt = txt.replaceAll("≡","="); + txt = txt.replaceAll("ó","o"); + txt = txt.replaceAll("³","3"); + txt = txt.replaceAll("〖","["); + txt = txt.replaceAll("〗","]"); + txt = txt.replaceAll("Å","A"); + txt = txt.replaceAll("ρ","p"); + txt = txt.replaceAll("ü","u"); + txt = txt.replaceAll("ɛ","e"); + txt = txt.replaceAll("č","c"); + txt = txt.replaceAll("š","s"); + txt = txt.replaceAll("ß","b"); + txt = txt.replaceAll("═","="); + txt = txt.replaceAll("£","L"); + txt = txt.replaceAll("Ł","L"); + txt = txt.replaceAll("ƒ","f"); + txt = txt.replaceAll("ä","a"); + txt = txt.replaceAll("–","-"); + txt = txt.replaceAll("⁻","-"); + txt = txt.replaceAll("〈","<"); + txt = txt.replaceAll("〉",">"); + txt = txt.replaceAll("χ","X"); + txt = txt.replaceAll("Đ","D"); + txt = txt.replaceAll("‰","%"); + txt = txt.replaceAll("·","."); + txt = txt.replaceAll("→",">"); + txt = txt.replaceAll("←","<"); + txt = txt.replaceAll("ζ","z"); + txt = txt.replaceAll("π","p"); + txt = txt.replaceAll("τ","t"); + txt = txt.replaceAll("ξ","X"); + txt = txt.replaceAll("η","h"); + txt = txt.replaceAll("ø","0"); + txt = txt.replaceAll("Δ","D"); + txt = txt.replaceAll("∆","D"); + txt = txt.replaceAll("∑","S"); + txt = txt.replaceAll("Ω","O"); + txt = txt.replaceAll("δ","d"); + txt = txt.replaceAll("σ","s"); + txt = txt.replaceAll("Φ","F"); + //txt = txt.replaceAll("[^\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\{\\}\\|\\:\"\\<\\>\\?\\`\\-\\=\\[\\]\\;\\'\\,\\.\\/\\r\\n0-9a-zA-Z ]"," "); + } + if(passage.getText().equals("") || passage.getText().matches("[ ]+")) + { + PassageContext.add("-notext-"); //Context + } + else + { + PassageContext.add(txt); //Context + } + PassageOffset.add(passage.getOffset()); //Offset + ArrayList AnnotationInPassage= new ArrayList(); // array of Annotations in the PassageName + AnnotationInPMID.add(AnnotationInPassage); + } + PassageNames.add(PassageName); + PassageContexts.add(PassageContext); + PassageOffsets.add(PassageOffset); + Annotations.add(AnnotationInPMID); + } + } + public void BioCReaderWithAnnotation(String input) throws IOException, XMLStreamException + { + ConnectorWoodstox connector = new ConnectorWoodstox(); + BioCCollection collection = new BioCCollection(); + collection = connector.startRead(new InputStreamReader(new FileInputStream(input), "UTF-8")); + + /* + * Per document + */ + while (connector.hasNext()) + { + BioCDocument document = connector.next(); + PMIDs.add(document.getID()); + + ArrayList PassageName= new ArrayList(); // array of Passage name + ArrayList PassageOffset= new ArrayList(); // array of Passage offset + ArrayList PassageContext= new ArrayList(); // array of Passage context + ArrayList> AnnotationInPMID= new ArrayList(); // array of Annotations in the PassageName + + /* + * Per Passage + */ + for (BioCPassage passage : document.getPassages()) + { + PassageName.add(passage.getInfon("type")); //Paragraph + + String txt = passage.getText(); + if(txt.matches("[\t ]+")) + { + txt = txt.replaceAll(".","@"); + } + else + { + //if(passage.getInfon("type").toLowerCase().equals("table")) + //{ + // txt=txt.replaceAll(" ", "|"); + //} + txt = txt.replaceAll("ω","w"); + txt = txt.replaceAll("μ","u"); + txt = txt.replaceAll("κ","k"); + txt = txt.replaceAll("α","a"); + txt = txt.replaceAll("γ","g"); + txt = txt.replaceAll("ɣ","g"); + txt = txt.replaceAll("β","b"); + txt = txt.replaceAll("×","x"); + txt = txt.replaceAll("‑","-"); + txt = txt.replaceAll("¹","1"); + txt = txt.replaceAll("²","2"); + txt = txt.replaceAll("°","o"); + txt = txt.replaceAll("ö","o"); + txt = txt.replaceAll("é","e"); + txt = txt.replaceAll("à","a"); + txt = txt.replaceAll("Á","A"); + txt = txt.replaceAll("ε","e"); + txt = txt.replaceAll("θ","O"); + txt = txt.replaceAll("•","."); + txt = txt.replaceAll("µ","u"); + txt = txt.replaceAll("λ","r"); + txt = txt.replaceAll("⁺","+"); + txt = txt.replaceAll("ν","v"); + txt = txt.replaceAll("ï","i"); + txt = txt.replaceAll("ã","a"); + txt = txt.replaceAll("≡","="); + txt = txt.replaceAll("ó","o"); + txt = txt.replaceAll("³","3"); + txt = txt.replaceAll("〖","["); + txt = txt.replaceAll("〗","]"); + txt = txt.replaceAll("Å","A"); + txt = txt.replaceAll("ρ","p"); + txt = txt.replaceAll("ü","u"); + txt = txt.replaceAll("ɛ","e"); + txt = txt.replaceAll("č","c"); + txt = txt.replaceAll("š","s"); + txt = txt.replaceAll("ß","b"); + txt = txt.replaceAll("═","="); + txt = txt.replaceAll("£","L"); + txt = txt.replaceAll("Ł","L"); + txt = txt.replaceAll("ƒ","f"); + txt = txt.replaceAll("ä","a"); + txt = txt.replaceAll("–","-"); + txt = txt.replaceAll("⁻","-"); + txt = txt.replaceAll("〈","<"); + txt = txt.replaceAll("〉",">"); + txt = txt.replaceAll("χ","X"); + txt = txt.replaceAll("Đ","D"); + txt = txt.replaceAll("‰","%"); + txt = txt.replaceAll("·","."); + txt = txt.replaceAll("→",">"); + txt = txt.replaceAll("←","<"); + txt = txt.replaceAll("ζ","z"); + txt = txt.replaceAll("π","p"); + txt = txt.replaceAll("τ","t"); + txt = txt.replaceAll("ξ","X"); + txt = txt.replaceAll("η","h"); + txt = txt.replaceAll("ø","0"); + txt = txt.replaceAll("Δ","D"); + txt = txt.replaceAll("∆","D"); + txt = txt.replaceAll("∑","S"); + txt = txt.replaceAll("Ω","O"); + txt = txt.replaceAll("δ","d"); + txt = txt.replaceAll("σ","s"); + txt = txt.replaceAll("Φ","F"); + //txt = txt.replaceAll("[^\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\{\\}\\|\\:\"\\<\\>\\?\\`\\-\\=\\[\\]\\;\\'\\,\\.\\/\\r\\n0-9a-zA-Z ]"," "); + } + if(passage.getText().equals("") || passage.getText().matches("[ ]+")) + { + PassageContext.add("-notext-"); //Context + } + else + { + PassageContext.add(txt); //Context + } + PassageOffset.add(passage.getOffset()); //Offset + ArrayList AnnotationInPassage= new ArrayList(); // array of Annotations in the PassageName + + /* + * Per Annotation : + * start + * last + * mention + * type + * id + */ + for (BioCAnnotation Anno : passage.getAnnotations()) + { + int start = Anno.getLocations().get(0).getOffset()-passage.getOffset(); // start + int last = start + Anno.getLocations().get(0).getLength(); // last + String AnnoMention=Anno.getText(); // mention + String Annotype = Anno.getInfon("type"); // type + String Annoid = Anno.getInfon("Identifier"); // identifier | MESH + if(Annoid == null) + { + Annoid = Anno.getInfon("Identifier"); // identifier | MESH + } + if(Annoid == null || Annoid.equals("null")) + { + AnnotationInPassage.add(start+"\t"+last+"\t"+AnnoMention+"\t"+Annotype); //paragraph + } + else + { + AnnotationInPassage.add(start+"\t"+last+"\t"+AnnoMention+"\t"+Annotype+"\t"+Annoid); //paragraph + } + } + AnnotationInPMID.add(AnnotationInPassage); + } + PassageNames.add(PassageName); + PassageContexts.add(PassageContext); + PassageOffsets.add(PassageOffset); + Annotations.add(AnnotationInPMID); + } + } + public void BioCOutput(String input,String output, ArrayList>> Annotations,boolean Final,boolean RemovePreviousAnno) throws IOException, XMLStreamException + { + boolean ShowUnNormalizedMention = false; + if(GNormPlus.setup_hash.containsKey("ShowUnNormalizedMention") && GNormPlus.setup_hash.get("ShowUnNormalizedMention").equals("True")) + { + ShowUnNormalizedMention = true; + } + + BioCDocumentWriter BioCOutputFormat = BioCFactory.newFactory(BioCFactory.WOODSTOX).createBioCDocumentWriter(new OutputStreamWriter(new FileOutputStream(output), "UTF-8")); + BioCCollection biocCollection_input = new BioCCollection(); + BioCCollection biocCollection_output = new BioCCollection(); + + //input: BioC + ConnectorWoodstox connector = new ConnectorWoodstox(); + biocCollection_input = connector.startRead(new InputStreamReader(new FileInputStream(input), "UTF-8")); + BioCOutputFormat.writeCollectionInfo(biocCollection_input); + int i=0; //count for pmid + while (connector.hasNext()) + { + BioCDocument document_output = new BioCDocument(); + BioCDocument document_input = connector.next(); + String PMID=document_input.getID(); + document_output.setID(PMID); + int annotation_count=0; + int j=0; //count for paragraph + for (BioCPassage passage_input : document_input.getPassages()) + { + BioCPassage passage_output = passage_input; + + if(RemovePreviousAnno == true) //clean the previous annotation, if the NER result is provided + { + passage_output.clearAnnotations(); + } + else + { + for (BioCAnnotation annotation : passage_output.getAnnotations()) + { + annotation.setID(""+annotation_count); + annotation_count++; + } + } + + int passage_Offset = passage_input.getOffset(); + String passage_Text = passage_input.getText(); + ArrayList AnnotationInPassage = new ArrayList(); + //ArrayList AnnotationInPassage = Annotations.get(i).get(j); + if(Annotations.size()>i && Annotations.get(i).size()>j) + { + for(int a=0;alast) + { + String mention = Anno[2]; + if(Final == true && passage_Text.length()>=last) + { + mention = passage_Text.substring(start, last); + } + if(mention.matches(".*\t.*")) + { + Anno[3]=Anno[4]; + if(Anno.length>=6) + { + Anno[4]=Anno[5]; + } + } + String type = Anno[3]; + String id = ""; // optional + if(Anno.length>=5){id = Anno[4];} + if(Final == true) + { + for(int b=0;b=lastb) + { + mentionb = passage_Text.substring(startb, lastb); + } + if(mentionb.matches(".*\t.*")) + { + Annob[3]=Annob[4]; + if(Annob.length>=6) + { + Annob[4]=Annob[5]; + } + } + String typeb = Annob[3]; + String idb = ""; // optional + if(Annob.length>=5){idb = Annob[4];} + + if(start == startb && last == lastb && type.equals(typeb)) + { + found = true; + if(id.matches("(Focus|Right|Left|Prefix|GeneID|Tax):[0-9]+") && (!idb.equals(""))) + { + } + else if(idb.matches("(Focus|Right|Left|Prefix|GeneID|Tax):[0-9]+") && (!id.matches("(Focus|Right|Left|Prefix|GeneID|Tax):[0-9]+")) && (!id.equals(""))) + { + AnnotationInPassage.set(b, start+"\t"+last+"\t"+mention+"\t"+type+"\t"+id); + } + else + { + if(id.equals("")) + { + } + else + { + AnnotationInPassage.set(b, start+"\t"+last+"\t"+mention+"\t"+type+"\t"+idb+";"+id); + } + + } + break; + } + } + } + } + if(found == false) + { + AnnotationInPassage.add(Annotations.get(i).get(j).get(a)); + } + } + } + for(int a=0;a id_hash = new HashMap (); + if(Anno.length>=5) + { + int start = Integer.parseInt(Anno[0]); + int last = Integer.parseInt(Anno[1]); + String mention = Anno[2]; + if(Final == true && passage_Text.length()>=last) + { + mention = passage_Text.substring(start, last); + } + if(mention.matches(".*\t.*")) + { + Anno[3]=Anno[4]; + if(Anno.length>=6) + { + Anno[4]=Anno[5]; + } + } + String ids = Anno[4]; + String idlist[]=ids.split(","); + for(int b=0;blast) + { + String mention = Anno[2]; + if(Final == true && passage_Text.length()>=last) + { + mention = passage_Text.substring(start, last); + } + if(mention.matches(".*\t.*")) + { + Anno[3]=Anno[4]; + if(Anno.length>=6) + { + Anno[4]=Anno[5]; + } + } + String type = Anno[3]; + if(type.equals("GeneID")){type="Gene";} + BioCAnnotation biocAnnotation = new BioCAnnotation(); + Map AnnoInfons = new HashMap(); + AnnoInfons.put("type", type); + if(Anno.length>=5) + { + String identifier = Anno[4]; + if(Final == true && ShowUnNormalizedMention==false) + { + if(type.matches("(FamilyName|Domain|Gene)")) + { + Pattern ptmp0 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9\\;]+)$"); + Matcher mtmp0 = ptmp0.matcher(identifier); + Pattern ptmp1 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)$"); + Matcher mtmp1 = ptmp1.matcher(identifier); + Pattern ptmp2 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)$"); + Matcher mtmp2 = ptmp2.matcher(identifier); + Pattern ptmp3 = Pattern.compile("^Homo\\:([0-9]+)$"); + Matcher mtmp3 = ptmp3.matcher(identifier); + if(mtmp0.find()) + { + String Method_SA = mtmp0.group(1); + String TaxonomyID = mtmp0.group(2); + String NCBIGeneID = mtmp0.group(3); + if(GNormPlus.Normalization2Protein_hash.containsKey(NCBIGeneID)) + { + AnnoInfons.put("UniProt", GNormPlus.Normalization2Protein_hash.get(NCBIGeneID)); + } + if(GNormPlus.HomologeneID_hash.containsKey(NCBIGeneID)) + { + AnnoInfons.put("NCBI Homologene", GNormPlus.HomologeneID_hash.get(NCBIGeneID)); + } + AnnoInfons.put("NCBI Gene", NCBIGeneID); + } + else if(mtmp1.find()) + { + String Method_SA = mtmp1.group(1); + String TaxonomyID = mtmp1.group(2); + String NCBIGeneID = mtmp1.group(3); + String HomoID = mtmp1.group(4); + if(GNormPlus.Normalization2Protein_hash.containsKey(NCBIGeneID)) + { + AnnoInfons.put("UniProt", GNormPlus.Normalization2Protein_hash.get(NCBIGeneID)); + } + if(GNormPlus.HomologeneID_hash.containsKey(NCBIGeneID)) + { + AnnoInfons.put("NCBI Homologene", GNormPlus.HomologeneID_hash.get(NCBIGeneID)); + } + AnnoInfons.put("NCBI Gene", NCBIGeneID); + } + else if(mtmp2.find()) + { + String Method_SA = mtmp2.group(1); + String TaxonomyID = mtmp2.group(2); + AnnoInfons.put("FocusSpecies", "NCBITaxonomyID:"+TaxonomyID); + } + else if(mtmp3.find()) + { + String Method_SA = mtmp3.group(1); + String HomoID = mtmp3.group(2); + AnnoInfons.put("NCBI Homologene", HomoID); + } + else + { + String identifiers[] = identifier.split(";"); + if(identifiers.length>1) + { + ArrayList identifierSTR = new ArrayList(); + ArrayList ProteinidSTR = new ArrayList(); + ArrayList HomoidSTR = new ArrayList(); + for(int idi=0;idi MatchedTokens_hash = new HashMap(); - private double ScoringFunction(String geneid,HashMap Mention_hash,String LF) - { - /* - * define gene/homo id - */ - - //LF - LF = LF.toLowerCase(); - LF = LF.replaceAll("([0-9])([a-z])", "$1 $2"); - LF = LF.replaceAll("([a-z])([0-9])", "$1 $2"); - LF = LF.replaceAll("([\\W\\-\\_])", " "); - LF = LF.replaceAll("[ ]+", " "); - String LF_tkn[]=LF.split(" "); - int LF_ParticalMatch = 0; - - Pattern ptmp = Pattern.compile("[0-9]+\\-([0-9]+)"); - Matcher mtmp = ptmp.matcher(geneid); - Pattern ptmp2 = Pattern.compile("([0-9]+)"); - Matcher mtmp2 = ptmp.matcher(geneid); - if(mtmp.find()) - { - geneid = "Homo:"+mtmp.group(1); - } - else - { - geneid = "Gene:"+geneid; - } - - if(GNormPlus.GeneScoring_hash.containsKey(geneid)) - { - HashMap TF = new HashMap(); // token i in gene j - HashMap TermFrequency = new HashMap(); - - /* - * Tokens in Query (Gene id lexicon) - */ - String l[]=GNormPlus.GeneScoring_hash.get(geneid).split("\t"); // Gene:2664293 cmk-1,cytidylate-1,kinase-1,mssa-1 0.4096 4 0.0625 1 2.0 - String tkns_Gene[] = l[0].split(","); - for(int i=0;i0){score = score + LF_ParticalMatch;/*System.out.println(geneid+"\t"+LF+"\t"+score);*/} - return score; - } - else - { - //System.out.println("Error: cannot find geneid: "+geneid+" in GeneScoring_hash"); - return 0.0; - } - } - - public void PreProcessing4GN(String Filename,String FilenameBioC) throws IOException, XMLStreamException - { - for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) - { - for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) - { - for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) - { - String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); - String start=anno[0]; - String last=anno[1]; - String mentions=anno[2]; - String type=anno[3]; - String id=""; - if(anno.length>=5) - { - id=anno[4]; - } - - if(type.equals("Gene")) - { - String mentionArr[] = mentions.split("\\|"); - boolean update=false; - for(int m=0;m locations = GNormPlus.PT_GeneChromosome.SearchMentionLocation(PassageContext,"ChromosomeLocation"); - for (int k = 0 ; k < locations.size() ; k++) - { - String anno[]=locations.get(k).split("\t"); - //int start= Integer.parseInt(anno[0]); - //int last= Integer.parseInt(anno[1]); - //String mention = anno[2]; - String ids = anno[3]; - //GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tChromosomeLocation\t"+ids); //paragraph - String IDs[] = ids.split("[\\|,]"); - for(int idcount=0;idcount Species_hash = new HashMap(); - for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) /** Paragraphs : j */ - { - for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) /** Annotation : k */ - { - String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); - String mentions=anno[2]; - String type=anno[3]; - if(type.matches("(Species|Genus|Strain|CellLine|Cell)")) - { - Species_hash.put(mentions,""); - } - } - } - - - /* - * Collect Gene mentions : - * - * GeneMention-taxid -> "ID" : geneid - * -> "type" : "Gene" - * -> start1-last1 : "" - * -> start2-last2 : "" - * -> start3-last3 : "" - */ - - String tiabs=""; - for (int j = 0; j < GNormPlus.BioCDocobj.PassageContexts.get(i).size(); j++) /** Paragraphs : j */ - { - tiabs=tiabs+GNormPlus.BioCDocobj.PassageContexts.get(i).get(j).toLowerCase(); - } - HashMap> GeneMention_hash = new HashMap>(); - HashMap Mention_hash = new HashMap(); - for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) /** Paragraphs : j */ - { - for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) /** Annotation : k */ - { - String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); - String start=anno[0]; - String last=anno[1]; - String mentions=anno[2]; - String type=anno[3]; - String taxids="Tax:9606"; - - if(anno.length>=5) - { - taxids=anno[4]; - } - String mentions_tmp=mentions.toLowerCase(); - mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); - mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); - taxids=taxids.replaceAll("(Focus|Right|Left|Prefix|Tax):",""); - if(taxids.equals("")) - { - taxids="9606"; - } - /** Filtering */ - boolean found_filter = false; - if(GNormPlus.Filtering_hash.containsKey(mentions_tmp)) // filtering - { - found_filter=true; - } - - if(found_filter==false) //abbreviation - { - for(String f : GNormPlus.Filtering_WithLongForm_hash.keySet()) - { - if( GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).matches(".*[\\t\\|]"+f+"\tGene.*") || - GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).matches(".*\\t"+f+"\\|[^\t]+\tGene.*") - ) - { - String lf=GNormPlus.Filtering_WithLongForm_hash.get(f); - if(tiabs.matches(".*"+lf+".*")) - { - found_filter=true; - break; - } - } - } - } - - if(found_filter==false) - { - if( GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).matches(".*[\\t\\|][a-z]\tGene.*") || - GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).matches(".*\\t[a-z]\\|[^\t]+\tGene.*") //32171191 Wuhan's - ) - { - found_filter=true; - - } - } - - if(found_filter == false) - { - if(type.matches("Gene")) - { - if(GeneMention_hash.containsKey(mentions+"\t"+taxids)) - { - GeneMention_hash.get(mentions+"\t"+taxids).put(start+"\t"+last,""); - } - else - { - HashMap offset_hash = new HashMap(); - offset_hash.put(start+"\t"+last,""); - GeneMention_hash.put(mentions+"\t"+taxids, offset_hash); - GeneMention_hash.get(mentions+"\t"+taxids).put("type", type); - Mention_hash.put(mentions,"Gene"); - } - } - else if(type.matches("(FamilyName|DomainMotif)")) - { - String GMs[]=mentions.split("\\|"); - for(int g=0;g GuaranteedGene2ID = new HashMap(); - HashMap MultiGene2ID = new HashMap(); - for(String GeneMentionTax : GeneMention_hash.keySet()) - { - String GT[]=GeneMentionTax.split("\\t"); - String mentions=GT[0]; - String taxids=GT[1]; - String GMs[]=mentions.split("\\|"); - - HashMap taxids_hash = new HashMap(); - String taxids_arr[]=taxids.split(","); - for(int t=0;t1) - //{ - // System.out.println(Pmid+"\t"+mention+"\t"+mentions+"\t"+IDstr); - //} - - for(int c=0;c Abbreviation - */ - for(String GeneMentionTax : GeneMention_hash.keySet()) - { - String MT[] = GeneMentionTax.split("\\t"); - if(GNormPlus.PmidLF2Abb_hash.containsKey(Pmid+"\t"+MT[0])) - { - String GeneMentionTax_Abb = GNormPlus.PmidLF2Abb_hash.get(Pmid+"\t"+MT[0]) + "\t" + MT[1]; - if(GeneMention_hash.containsKey(GeneMentionTax_Abb) && GeneMention_hash.get(GeneMentionTax).containsKey("ID")) - { - GeneMention_hash.get(GeneMentionTax_Abb).put("ID", GeneMention_hash.get(GeneMentionTax).get("ID")); - } - } - } - - /* - * Gene id refinement: - * 5. Ranking by scoring function (inference network) - */ - for(String GeneMentionTax : GeneMention_hash.keySet()) - { - if(GeneMention_hash.get(GeneMentionTax).containsKey("ID") && GeneMention_hash.get(GeneMentionTax).get("ID").matches(".+,.+")) - { - String geneids=GeneMention_hash.get(GeneMentionTax).get("ID"); - String geneid[] = geneids.split(","); - - String OutputStyle="Top1"; - if(OutputStyle.equals("Top1")) - { - //only return the best one - double max_score=0.0; - String target_geneid=""; - for(int g=0;gmax_score) - { - max_score=score; - target_geneid=geneid[g]; - } - else if(score == 0.0) - { - //System.out.println(GeneMentionTax); - } - } - GeneMention_hash.get(GeneMentionTax).put("ID", target_geneid); - } - else // "All" - { - //return all geneids - String geneSTR=""; - for(int g=0;g FullName - * - */ - for(String GeneMentionTax : GeneMention_hash.keySet()) - { - String MT[] = GeneMentionTax.split("\\t"); - if(GNormPlus.PmidAbb2LF_hash.containsKey(Pmid+"\t"+MT[0])) - { - String GeneMentionTax_LF = GNormPlus.PmidAbb2LF_hash.get(Pmid+"\t"+MT[0]) + "\t" + MT[1]; - if(GeneMention_hash.containsKey(GeneMentionTax_LF) && GeneMention_hash.get(GeneMentionTax).containsKey("ID")) - { - GeneMention_hash.get(GeneMentionTax_LF).put("ID", GeneMention_hash.get(GeneMentionTax).get("ID")); - } - } - } - - /* - * Gene id refinement: - * 7. The inference network tokens of Abbreviation.ID should contain at least LF tokens - * 8. The short mention should be filtered if not long form support - */ - ArrayList removeGMT = new ArrayList(); - for(String GeneMentionTax : GeneMention_hash.keySet()) - { - String GT[]=GeneMentionTax.split("\\t"); - String mentions=GT[0]; - String tax=GT[1]; - if(GeneMention_hash.get(GeneMentionTax).containsKey("type") && GeneMention_hash.get(GeneMentionTax).get("type").equals("Gene") && GeneMention_hash.get(GeneMentionTax).containsKey("ID")) - { - String type = GeneMention_hash.get(GeneMentionTax).get("type"); - String id = GeneMention_hash.get(GeneMentionTax).get("ID"); - String geneid=""; - Pattern ptmp1 = Pattern.compile("^([0-9]+)\\-([0-9]+)$"); - Pattern ptmp2 = Pattern.compile("^([0-9]+)$"); - Matcher mtmp1 = ptmp1.matcher(id); - Matcher mtmp2 = ptmp2.matcher(id); - //System.out.println(id); - if(mtmp1.find()) - { - geneid = "Homo:"+mtmp1.group(2); - } - else if(mtmp2.find()) - { - geneid = "Gene:"+mtmp2.group(1); - } - - boolean LongFormTknMatch= false; - boolean LongFormExist= true; - if(GNormPlus.GeneScoring_hash.containsKey(geneid)) - { - if(GNormPlus.PmidAbb2LF_lc_hash.containsKey(Pmid+"\t"+mentions.toLowerCase())) - { - /* - * token in lexicon : tkn_lexicon - * token in mention : tkn_mention - */ - String l[]=GNormPlus.GeneScoring_hash.get(geneid).split("\t"); // Gene:2664293 cmk-1,cytidylate-1,kinase-1,mssa-1 0.4096 4 0.0625 1 2.0 - String tkns_Gene[] = l[0].split(","); - ArrayList tkn_lexicon = new ArrayList(); - for(int ti=0;ti=5) - { - taxid_org=anno[4]; - } - String taxids=taxid_org.replaceAll("(Focus|Right|Left|Prefix|Tax):",""); - String GMs[]=mentions.split("\\|"); - - if(GeneMention_hash.containsKey(mentions+"\t"+taxids) && GeneMention_hash.get(mentions+"\t"+taxids).containsKey("TargetTax")) - { - String taxtype=taxid_org.replaceAll(":([0-9,]+)",""); - String taxid=GeneMention_hash.get(mentions+"\t"+taxids).get("TargetTax"); - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mentions+"\t"+type+"\t"+taxtype+":"+taxid); - } - - if(type.equals("Gene")) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k) + "|"); - - - if(GeneMention_hash.containsKey(mentions+"\t"+taxids) && GeneMention_hash.get(mentions+"\t"+taxids).containsKey("ID")) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k) + GeneMention_hash.get(mentions+"\t"+taxids).get("ID") + "," ); - } - else // cannot find appropriate species - { - //System.out.println(mention+"\t"+taxid); - } - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).substring(0, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).length()-1)); // remove ",$" - } - } - } - - //Extend to all gene mentions - HashMap GeneMentions = new HashMap(); // Extending Gene mentions - HashMap GeneMentionLocation = new HashMap(); // Extending Gene mentions - for(int j=0;j=5) - { - id=anno[4]; - } - if(type.equals("Gene") && id.matches("(Focus|Right|Left|Prefix|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)")) - { - GeneMentions.put(mentions.toLowerCase(), id); - for (int s=start ;s<=last;s++) - { - GeneMentionLocation.put(j+"\t"+s,""); - } - } - else if(type.equals("Gene") && id.matches("(Focus|Right|Left|Prefix|Tax)\\:([0-9]+)\\|([0-9]+)")) - { - GeneMentions.put(mentions.toLowerCase(), id); - for (int s=start ;s<=last;s++) - { - GeneMentionLocation.put(j+"\t"+s,""); - } - } - } - } - for(int j=0;ji && GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j) - { - String PassageContexts = " " + GNormPlus.BioCDocobj.PassageContexts.get(i).get(j) + " "; - String PassageContexts_tmp = PassageContexts.toLowerCase(); - for(String gm : GeneMentions.keySet()) - { - String id = GeneMentions.get(gm); - if(gm.length()>=3) - { - gm = gm.replaceAll("[ ]*[\\|]*$", ""); - gm = gm.replaceAll("^[\\|]*[ ]*", ""); - gm = gm.replaceAll("[\\|][\\|]+", "\\|"); - if(!gm.matches("[\\W\\-\\_]*")) - { - gm = gm.replaceAll("([^A-Za-z0-9\\| ])", "\\\\$1"); - Pattern ptmp = Pattern.compile("^(.*[\\W\\-\\_])("+gm+")([\\W\\-\\_].*)$"); - Matcher mtmp = ptmp.matcher(PassageContexts_tmp); - while(mtmp.find()) - { - String pre = mtmp.group(1); - String gmtmp = mtmp.group(2); - String post = mtmp.group(3); - - int start = pre.length()-1; - int last = start+gmtmp.length(); - if(PassageContexts.length()>=last+1) - { - String mention = PassageContexts.substring(start+1,last+1); - if(!GeneMentionLocation.containsKey(j+"\t"+start) && !GeneMentionLocation.containsKey(j+"\t"+last)) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tGene\t"+id); - } - } - gmtmp = gmtmp.replaceAll(".", "\\@"); - PassageContexts_tmp=pre+""+gmtmp+""+post; - mtmp = ptmp.matcher(PassageContexts_tmp); - } - } - } - } - } - } - - //Apply to FamilyNames - HashMap geneids = new HashMap(); // Extending Gene mentions - for(int j=0;j=5) - { - id=anno[4]; - } - Pattern ptmp0 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)$"); - Matcher mtmp0 = ptmp0.matcher(id); - Pattern ptmp1 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)$"); - Matcher mtmp1 = ptmp1.matcher(id); - if(mtmp0.find()) - { - geneids.put(mtmp0.group(3), ""); - } - if(mtmp1.find()) - { - geneids.put(mtmp1.group(3), ""); - } - } - } - } - for(int j=0;j=0 ; k--) // Annotation : k - { - String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); - String mention=anno[2]; - String type=anno[3]; - if(type.matches("(FamilyName|DomainMotif)")) - { - String id="Tax:9606"; - if(anno.length>=5) - { - id=anno[4]; - } - String IDstrs = GNormPlus.PT_FamilyName.MentionMatch(mention); - String IDstr[]=IDstrs.split("\\|"); - String ids=""; - for(int id_i=0;id_i=5) - { - Annotation_k=anno[0]+"\t"+anno[1]+"\t"+anno[2]+"\t"+type+"\t"+anno[4]; - } - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k,Annotation_k+"|"+ids); - } - else - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(k); - } - } - } - } - //Species "*" and "(anti)" removed. - for(int j=0;j=0 ; k--) // Annotation : k - { - String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); - String type=anno[3]; - if(type.equals("Species") || type.equals("Genus") || type.equals("Strain") || type.equals("CellLine") || type.equals("Cell")) - { - String id=anno[4]; - id=id.replaceAll("\\*", ""); - id=id.replaceAll("\\(anti\\)", ""); - String Annotation_k=anno[0]+"\t"+anno[1]+"\t"+anno[2]+"\t"+type+"\t"+id; - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k,Annotation_k); - } - } - } - - for(int j=0;j=0 ; k--) // Annotation : k - { - String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); - int start = Integer.parseInt(anno[0]); - int last = Integer.parseInt(anno[1]); - String mention = anno[2]; - String type = anno[3]; - String id = anno[4]; - if(type.equals("Gene") && Species_hash.containsKey(mention)) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(k); - } - else if(type.equals("Gene") && id.equals("")) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(k); - } - else - { - for (int k1 = GNormPlus.BioCDocobj.Annotations.get(i).get(j).size()-1; k1 >=0 ; k1--) // Annotation : k - { - if(k1 != k) - { - String anno1[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k1).split("\t"); - int start1 = Integer.parseInt(anno1[0]); - int last1 = Integer.parseInt(anno1[1]); - if((start1=last) || (start1<=start && last1>last)) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(k); - break; - } - } - } - } - } - } - } - if(GeneIDMatch == true) - { - //GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,true); - } - else - { - GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,true,true); - } - } - /* - * Search Potential GeneID in the Prefix Tree - */ - public ArrayList SearchGeneIDLocation(String Doc) - { - ArrayList location = new ArrayList(); - - String Doc_tmp=" "+Doc+" "; - Pattern ptmp = Pattern.compile("^(.*[^A-Za-z0-9]+)([0-9]+\\S*[A-Za-z]+|[A-Za-z]+\\S*[0-9]+|[0-9]+\\S*[A-Za-z]+\\S*[0-9]+|[A-Za-z]+\\S*[0-9]+\\S*[A-Za-z]+)([^A-Za-z0-9]+.*)$"); - Matcher mtmp = ptmp.matcher(Doc_tmp); - while(mtmp.find()) - { - String str1=mtmp.group(1); - String str2=mtmp.group(2); - String str3=mtmp.group(3); - for(int m=str1.length();m<=(str1.length()+str2.length());m++) - { - int start = str1.length()-1; - int last = start+str2.length(); - String mention = Doc.substring(start, last); - if(!mention.matches(".*[\\'\\;\\[\\]\\+\\*\\\\].*")) - { - if(last-start>6 && (mention.matches(".*\\(.*\\).*") || mention.matches("[^\\(\\)]+")) ) - { - Pattern ptmp1 = Pattern.compile("^(.+[^0-9])([0-9]+)\\-([0-9]+)$"); - Matcher mtmp1 = ptmp1.matcher(mention); - Pattern ptmp2 = Pattern.compile("^(.+[^0-9])([0-9]+)\\-(.+[^0-9])([0-9]+)$"); - Matcher mtmp2 = ptmp2.matcher(mention); - if(mtmp1.find()) - { - String S1 = mtmp1.group(1); - if(mtmp1.group(2).length()<=6 && mtmp1.group(3).length()<=6) - { - int Num1 = Integer.parseInt(mtmp1.group(2)); - int Num2 = Integer.parseInt(mtmp1.group(3)); - String prefix = ""; - Pattern ptmp3 = Pattern.compile("^([0]+)"); - Matcher mtmp3 = ptmp3.matcher(mtmp1.group(2)); - if(mtmp3.find()) - { - prefix = mtmp3.group(1); - } - if(Num2-Num1>0 && (Num2-Num1<=20)) - { - for(int n=Num1;n<=Num2;n++) - { - String StrNum=S1+prefix+n; - if(StrNum.length()>=5) - { - location.add(start+"\t"+last+"\t"+StrNum+"\tGeneID"); - } - } - } - } - } - else if(mtmp2.find()) - { - if(mtmp2.group(2).length()<=6 && mtmp2.group(4).length()<=6) - { - String S1 = mtmp2.group(1); - int Num1 = Integer.parseInt(mtmp2.group(2)); - String S2 = mtmp2.group(3); - int Num2 = Integer.parseInt(mtmp2.group(4)); - if(S1.equals(S2)) - { - String prefix = ""; - Pattern ptmp3 = Pattern.compile("^([0]+)"); - Matcher mtmp3 = ptmp3.matcher(mtmp2.group(2)); - if(mtmp3.find()) - { - prefix = mtmp3.group(1); - } - if(Num2-Num1>0 && (Num2-Num1<=20)) - { - for(int n=Num1;n<=Num2;n++) - { - String StrNum=S1+prefix+n; - if(StrNum.length()>=5) - { - location.add(start+"\t"+last+"\t"+StrNum+"\tGeneID"); - } - } - } - } - } - } - } - location.add(start+"\t"+last+"\t"+mention+"\tGeneID"); - } - } - String men=""; - for(int m=0;m locations = SearchGeneIDLocation(PassageContext); - for (int k = 0 ; k < locations.size() ; k++) - { - String anno[]=locations.get(k).split("\t"); - String mention = anno[2].toLowerCase(); - mention = mention.replaceAll("[\\W\\-\\_]+", ""); - if(GNormPlus.GeneIDs_hash.containsKey(mention)) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(locations.get(k)+"\tGeneID:"+GNormPlus.GeneIDs_hash.get(mention)); //paragraph - } - } - } - } - GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,true,true); - } +/** + * Project: GNormPlus + * Function: Gene Normalization + */ + +package GNormPluslib; + +import bioc.BioCAnnotation; +import bioc.BioCCollection; +import bioc.BioCDocument; +import bioc.BioCLocation; +import bioc.BioCPassage; + +import bioc.io.BioCDocumentWriter; +import bioc.io.BioCFactory; +import bioc.io.woodstox.ConnectorWoodstox; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.text.BreakIterator; +import java.time.LocalDate; +import java.time.ZoneId; +import java.text.DecimalFormat; +import java.math.RoundingMode; + +import javax.xml.stream.XMLStreamException; + +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; + +public class GN +{ + public static HashMap MatchedTokens_hash = new HashMap(); + private double ScoringFunction(String geneid,HashMap Mention_hash,String LF) + { + /* + * define gene/homo id + */ + + //LF + LF = LF.toLowerCase(); + LF = LF.replaceAll("([0-9])([a-z])", "$1 $2"); + LF = LF.replaceAll("([a-z])([0-9])", "$1 $2"); + LF = LF.replaceAll("([\\W\\-\\_])", " "); + LF = LF.replaceAll("[ ]+", " "); + String LF_tkn[]=LF.split(" "); + int LF_ParticalMatch = 0; + + Pattern ptmp = Pattern.compile("[0-9]+\\-([0-9]+)"); + Matcher mtmp = ptmp.matcher(geneid); + Pattern ptmp2 = Pattern.compile("([0-9]+)"); + Matcher mtmp2 = ptmp.matcher(geneid); + if(mtmp.find()) + { + geneid = "Homo:"+mtmp.group(1); + } + else + { + geneid = "Gene:"+geneid; + } + + if(GNormPlus.GeneScoring_hash.containsKey(geneid)) + { + HashMap TF = new HashMap(); // token i in gene j + HashMap TermFrequency = new HashMap(); + + /* + * Tokens in Query (Gene id lexicon) + */ + String l[]=GNormPlus.GeneScoring_hash.get(geneid).split("\t"); // Gene:2664293 cmk-1,cytidylate-1,kinase-1,mssa-1 0.4096 4 0.0625 1 2.0 + String tkns_Gene[] = l[0].split(","); + for(int i=0;i0){score = score + LF_ParticalMatch;/*System.out.println(geneid+"\t"+LF+"\t"+score);*/} + return score; + } + else + { + //System.out.println("Error: cannot find geneid: "+geneid+" in GeneScoring_hash"); + return 0.0; + } + } + + public void PreProcessing4GN(String Filename,String FilenameBioC) throws IOException, XMLStreamException + { + for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) + { + for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) + { + for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) + { + String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); + String start=anno[0]; + String last=anno[1]; + String mentions=anno[2]; + String type=anno[3]; + String id=""; + if(anno.length>=5) + { + id=anno[4]; + } + + if(type.equals("Gene")) + { + String mentionArr[] = mentions.split("\\|"); + boolean update=false; + for(int m=0;m locations = GNormPlus.PT_GeneChromosome.SearchMentionLocation(PassageContext,"ChromosomeLocation"); + for (int k = 0 ; k < locations.size() ; k++) + { + String anno[]=locations.get(k).split("\t"); + //int start= Integer.parseInt(anno[0]); + //int last= Integer.parseInt(anno[1]); + //String mention = anno[2]; + String ids = anno[3]; + //GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tChromosomeLocation\t"+ids); //paragraph + String IDs[] = ids.split("[\\|,]"); + for(int idcount=0;idcount Species_hash = new HashMap(); + for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) /** Paragraphs : j */ + { + for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) /** Annotation : k */ + { + String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); + String mentions=anno[2]; + String type=anno[3]; + if(type.matches("(Species|Genus|Strain|CellLine|Cell)")) + { + Species_hash.put(mentions,""); + } + } + } + + + /* + * Collect Gene mentions : + * + * GeneMention-taxid -> "ID" : geneid + * -> "type" : "Gene" + * -> start1-last1 : "" + * -> start2-last2 : "" + * -> start3-last3 : "" + */ + + String tiabs=""; + for (int j = 0; j < GNormPlus.BioCDocobj.PassageContexts.get(i).size(); j++) /** Paragraphs : j */ + { + tiabs=tiabs+GNormPlus.BioCDocobj.PassageContexts.get(i).get(j).toLowerCase(); + } + HashMap> GeneMention_hash = new HashMap>(); + HashMap Mention_hash = new HashMap(); + for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) /** Paragraphs : j */ + { + for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) /** Annotation : k */ + { + String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); + String start=anno[0]; + String last=anno[1]; + String mentions=anno[2]; + String type=anno[3]; + String taxids="Tax:9606"; + + if(anno.length>=5) + { + taxids=anno[4]; + } + String mentions_tmp=mentions.toLowerCase(); + mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); + mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); + taxids=taxids.replaceAll("(Focus|Right|Left|Prefix|Tax):",""); + if(taxids.equals("")) + { + taxids="9606"; + } + /** Filtering */ + boolean found_filter = false; + if(GNormPlus.Filtering_hash.containsKey(mentions_tmp)) // filtering + { + found_filter=true; + } + + if(found_filter==false) //abbreviation + { + for(String f : GNormPlus.Filtering_WithLongForm_hash.keySet()) + { + if( GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).matches(".*[\\t\\|]"+f+"\tGene.*") || + GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).matches(".*\\t"+f+"\\|[^\t]+\tGene.*") + ) + { + String lf=GNormPlus.Filtering_WithLongForm_hash.get(f); + if(tiabs.matches(".*"+lf+".*")) + { + found_filter=true; + break; + } + } + } + } + + if(found_filter==false) + { + if( GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).matches(".*[\\t\\|][a-z]\tGene.*") || + GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).matches(".*\\t[a-z]\\|[^\t]+\tGene.*") //32171191 Wuhan's + ) + { + found_filter=true; + + } + } + + if(found_filter == false) + { + if(type.matches("Gene")) + { + if(GeneMention_hash.containsKey(mentions+"\t"+taxids)) + { + GeneMention_hash.get(mentions+"\t"+taxids).put(start+"\t"+last,""); + } + else + { + HashMap offset_hash = new HashMap(); + offset_hash.put(start+"\t"+last,""); + GeneMention_hash.put(mentions+"\t"+taxids, offset_hash); + GeneMention_hash.get(mentions+"\t"+taxids).put("type", type); + Mention_hash.put(mentions,"Gene"); + } + } + else if(type.matches("(FamilyName|DomainMotif)")) + { + String GMs[]=mentions.split("\\|"); + for(int g=0;g GuaranteedGene2ID = new HashMap(); + HashMap MultiGene2ID = new HashMap(); + for(String GeneMentionTax : GeneMention_hash.keySet()) + { + String GT[]=GeneMentionTax.split("\\t"); + String mentions=GT[0]; + String taxids=GT[1]; + String GMs[]=mentions.split("\\|"); + + HashMap taxids_hash = new HashMap(); + String taxids_arr[]=taxids.split(","); + for(int t=0;t1) + //{ + // System.out.println(Pmid+"\t"+mention+"\t"+mentions+"\t"+IDstr); + //} + + for(int c=0;c Abbreviation + */ + for(String GeneMentionTax : GeneMention_hash.keySet()) + { + String MT[] = GeneMentionTax.split("\\t"); + if(GNormPlus.PmidLF2Abb_hash.containsKey(Pmid+"\t"+MT[0])) + { + String GeneMentionTax_Abb = GNormPlus.PmidLF2Abb_hash.get(Pmid+"\t"+MT[0]) + "\t" + MT[1]; + if(GeneMention_hash.containsKey(GeneMentionTax_Abb) && GeneMention_hash.get(GeneMentionTax).containsKey("ID")) + { + GeneMention_hash.get(GeneMentionTax_Abb).put("ID", GeneMention_hash.get(GeneMentionTax).get("ID")); + } + } + } + + /* + * Gene id refinement: + * 5. Ranking by scoring function (inference network) + */ + for(String GeneMentionTax : GeneMention_hash.keySet()) + { + if(GeneMention_hash.get(GeneMentionTax).containsKey("ID") && GeneMention_hash.get(GeneMentionTax).get("ID").matches(".+,.+")) + { + String geneids=GeneMention_hash.get(GeneMentionTax).get("ID"); + String geneid[] = geneids.split(","); + + String OutputStyle="Top1"; + if(OutputStyle.equals("Top1")) + { + //only return the best one + double max_score=0.0; + String target_geneid=""; + for(int g=0;gmax_score) + { + max_score=score; + target_geneid=geneid[g]; + } + else if(score == 0.0) + { + //System.out.println(GeneMentionTax); + } + } + GeneMention_hash.get(GeneMentionTax).put("ID", target_geneid); + } + else // "All" + { + //return all geneids + String geneSTR=""; + for(int g=0;g FullName + * + */ + for(String GeneMentionTax : GeneMention_hash.keySet()) + { + String MT[] = GeneMentionTax.split("\\t"); + if(GNormPlus.PmidAbb2LF_hash.containsKey(Pmid+"\t"+MT[0])) + { + String GeneMentionTax_LF = GNormPlus.PmidAbb2LF_hash.get(Pmid+"\t"+MT[0]) + "\t" + MT[1]; + if(GeneMention_hash.containsKey(GeneMentionTax_LF) && GeneMention_hash.get(GeneMentionTax).containsKey("ID")) + { + GeneMention_hash.get(GeneMentionTax_LF).put("ID", GeneMention_hash.get(GeneMentionTax).get("ID")); + } + } + } + + /* + * Gene id refinement: + * 7. The inference network tokens of Abbreviation.ID should contain at least LF tokens + * 8. The short mention should be filtered if not long form support + */ + ArrayList removeGMT = new ArrayList(); + for(String GeneMentionTax : GeneMention_hash.keySet()) + { + String GT[]=GeneMentionTax.split("\\t"); + String mentions=GT[0]; + String tax=GT[1]; + if(GeneMention_hash.get(GeneMentionTax).containsKey("type") && GeneMention_hash.get(GeneMentionTax).get("type").equals("Gene") && GeneMention_hash.get(GeneMentionTax).containsKey("ID")) + { + String type = GeneMention_hash.get(GeneMentionTax).get("type"); + String id = GeneMention_hash.get(GeneMentionTax).get("ID"); + String geneid=""; + Pattern ptmp1 = Pattern.compile("^([0-9]+)\\-([0-9]+)$"); + Pattern ptmp2 = Pattern.compile("^([0-9]+)$"); + Matcher mtmp1 = ptmp1.matcher(id); + Matcher mtmp2 = ptmp2.matcher(id); + //System.out.println(id); + if(mtmp1.find()) + { + geneid = "Homo:"+mtmp1.group(2); + } + else if(mtmp2.find()) + { + geneid = "Gene:"+mtmp2.group(1); + } + + boolean LongFormTknMatch= false; + boolean LongFormExist= true; + if(GNormPlus.GeneScoring_hash.containsKey(geneid)) + { + if(GNormPlus.PmidAbb2LF_lc_hash.containsKey(Pmid+"\t"+mentions.toLowerCase())) + { + /* + * token in lexicon : tkn_lexicon + * token in mention : tkn_mention + */ + String l[]=GNormPlus.GeneScoring_hash.get(geneid).split("\t"); // Gene:2664293 cmk-1,cytidylate-1,kinase-1,mssa-1 0.4096 4 0.0625 1 2.0 + String tkns_Gene[] = l[0].split(","); + ArrayList tkn_lexicon = new ArrayList(); + for(int ti=0;ti=5) + { + taxid_org=anno[4]; + } + String taxids=taxid_org.replaceAll("(Focus|Right|Left|Prefix|Tax):",""); + String GMs[]=mentions.split("\\|"); + + if(GeneMention_hash.containsKey(mentions+"\t"+taxids) && GeneMention_hash.get(mentions+"\t"+taxids).containsKey("TargetTax")) + { + String taxtype=taxid_org.replaceAll(":([0-9,]+)",""); + String taxid=GeneMention_hash.get(mentions+"\t"+taxids).get("TargetTax"); + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mentions+"\t"+type+"\t"+taxtype+":"+taxid); + } + + if(type.equals("Gene")) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k) + "|"); + + + if(GeneMention_hash.containsKey(mentions+"\t"+taxids) && GeneMention_hash.get(mentions+"\t"+taxids).containsKey("ID")) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k) + GeneMention_hash.get(mentions+"\t"+taxids).get("ID") + "," ); + } + else // cannot find appropriate species + { + //System.out.println(mention+"\t"+taxid); + } + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).substring(0, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).length()-1)); // remove ",$" + } + } + } + + //Extend to all gene mentions + HashMap GeneMentions = new HashMap(); // Extending Gene mentions + HashMap GeneMentionLocation = new HashMap(); // Extending Gene mentions + for(int j=0;j=5) + { + id=anno[4]; + } + if(type.equals("Gene") && id.matches("(Focus|Right|Left|Prefix|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)")) + { + GeneMentions.put(mentions.toLowerCase(), id); + for (int s=start ;s<=last;s++) + { + GeneMentionLocation.put(j+"\t"+s,""); + } + } + else if(type.equals("Gene") && id.matches("(Focus|Right|Left|Prefix|Tax)\\:([0-9]+)\\|([0-9]+)")) + { + GeneMentions.put(mentions.toLowerCase(), id); + for (int s=start ;s<=last;s++) + { + GeneMentionLocation.put(j+"\t"+s,""); + } + } + } + } + for(int j=0;ji && GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j) + { + String PassageContexts = " " + GNormPlus.BioCDocobj.PassageContexts.get(i).get(j) + " "; + String PassageContexts_tmp = PassageContexts.toLowerCase(); + for(String gm : GeneMentions.keySet()) + { + String id = GeneMentions.get(gm); + if(gm.length()>=3) + { + gm = gm.replaceAll("[ ]*[\\|]*$", ""); + gm = gm.replaceAll("^[\\|]*[ ]*", ""); + gm = gm.replaceAll("[\\|][\\|]+", "\\|"); + if(!gm.matches("[\\W\\-\\_]*")) + { + gm = gm.replaceAll("([^A-Za-z0-9\\| ])", "\\\\$1"); + Pattern ptmp = Pattern.compile("^(.*[\\W\\-\\_])("+gm+")([\\W\\-\\_].*)$"); + Matcher mtmp = ptmp.matcher(PassageContexts_tmp); + while(mtmp.find()) + { + String pre = mtmp.group(1); + String gmtmp = mtmp.group(2); + String post = mtmp.group(3); + + int start = pre.length()-1; + int last = start+gmtmp.length(); + if(PassageContexts.length()>=last+1) + { + String mention = PassageContexts.substring(start+1,last+1); + if(!GeneMentionLocation.containsKey(j+"\t"+start) && !GeneMentionLocation.containsKey(j+"\t"+last)) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tGene\t"+id); + } + } + gmtmp = gmtmp.replaceAll(".", "\\@"); + PassageContexts_tmp=pre+""+gmtmp+""+post; + mtmp = ptmp.matcher(PassageContexts_tmp); + } + } + } + } + } + } + + //Apply to FamilyNames + HashMap geneids = new HashMap(); // Extending Gene mentions + for(int j=0;j=5) + { + id=anno[4]; + } + Pattern ptmp0 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)$"); + Matcher mtmp0 = ptmp0.matcher(id); + Pattern ptmp1 = Pattern.compile("^(Focus|Right|Left|Prefix|GeneID|Tax)\\:([0-9]+)\\|([0-9]+)\\-([0-9]+)$"); + Matcher mtmp1 = ptmp1.matcher(id); + if(mtmp0.find()) + { + geneids.put(mtmp0.group(3), ""); + } + if(mtmp1.find()) + { + geneids.put(mtmp1.group(3), ""); + } + } + } + } + for(int j=0;j=0 ; k--) // Annotation : k + { + String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); + String mention=anno[2]; + String type=anno[3]; + if(type.matches("(FamilyName|DomainMotif)")) + { + String id="Tax:9606"; + if(anno.length>=5) + { + id=anno[4]; + } + String IDstrs = GNormPlus.PT_FamilyName.MentionMatch(mention); + String IDstr[]=IDstrs.split("\\|"); + String ids=""; + for(int id_i=0;id_i=5) + { + Annotation_k=anno[0]+"\t"+anno[1]+"\t"+anno[2]+"\t"+type+"\t"+anno[4]; + } + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k,Annotation_k+"|"+ids); + } + else + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(k); + } + } + } + } + //Species "*" and "(anti)" removed. + for(int j=0;j=0 ; k--) // Annotation : k + { + String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); + String type=anno[3]; + if(type.equals("Species") || type.equals("Genus") || type.equals("Strain") || type.equals("CellLine") || type.equals("Cell")) + { + String id=anno[4]; + id=id.replaceAll("\\*", ""); + id=id.replaceAll("\\(anti\\)", ""); + String Annotation_k=anno[0]+"\t"+anno[1]+"\t"+anno[2]+"\t"+type+"\t"+id; + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k,Annotation_k); + } + } + } + + for(int j=0;j=0 ; k--) // Annotation : k + { + String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); + int start = Integer.parseInt(anno[0]); + int last = Integer.parseInt(anno[1]); + String mention = anno[2]; + String type = anno[3]; + String id = anno[4]; + if(type.equals("Gene") && Species_hash.containsKey(mention)) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(k); + } + else if(type.equals("Gene") && id.equals("")) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(k); + } + else + { + for (int k1 = GNormPlus.BioCDocobj.Annotations.get(i).get(j).size()-1; k1 >=0 ; k1--) // Annotation : k + { + if(k1 != k) + { + String anno1[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k1).split("\t"); + int start1 = Integer.parseInt(anno1[0]); + int last1 = Integer.parseInt(anno1[1]); + if((start1=last) || (start1<=start && last1>last)) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(k); + break; + } + } + } + } + } + } + } + if(GeneIDMatch == true) + { + //GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,true); + } + else + { + GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,true,true); + } + } + /* + * Search Potential GeneID in the Prefix Tree + */ + public ArrayList SearchGeneIDLocation(String Doc) + { + ArrayList location = new ArrayList(); + + String Doc_tmp=" "+Doc+" "; + Pattern ptmp = Pattern.compile("^(.*[^A-Za-z0-9]+)([0-9]+\\S*[A-Za-z]+|[A-Za-z]+\\S*[0-9]+|[0-9]+\\S*[A-Za-z]+\\S*[0-9]+|[A-Za-z]+\\S*[0-9]+\\S*[A-Za-z]+)([^A-Za-z0-9]+.*)$"); + Matcher mtmp = ptmp.matcher(Doc_tmp); + while(mtmp.find()) + { + String str1=mtmp.group(1); + String str2=mtmp.group(2); + String str3=mtmp.group(3); + for(int m=str1.length();m<=(str1.length()+str2.length());m++) + { + int start = str1.length()-1; + int last = start+str2.length(); + String mention = Doc.substring(start, last); + if(!mention.matches(".*[\\'\\;\\[\\]\\+\\*\\\\].*")) + { + if(last-start>6 && (mention.matches(".*\\(.*\\).*") || mention.matches("[^\\(\\)]+")) ) + { + Pattern ptmp1 = Pattern.compile("^(.+[^0-9])([0-9]+)\\-([0-9]+)$"); + Matcher mtmp1 = ptmp1.matcher(mention); + Pattern ptmp2 = Pattern.compile("^(.+[^0-9])([0-9]+)\\-(.+[^0-9])([0-9]+)$"); + Matcher mtmp2 = ptmp2.matcher(mention); + if(mtmp1.find()) + { + String S1 = mtmp1.group(1); + if(mtmp1.group(2).length()<=6 && mtmp1.group(3).length()<=6) + { + int Num1 = Integer.parseInt(mtmp1.group(2)); + int Num2 = Integer.parseInt(mtmp1.group(3)); + String prefix = ""; + Pattern ptmp3 = Pattern.compile("^([0]+)"); + Matcher mtmp3 = ptmp3.matcher(mtmp1.group(2)); + if(mtmp3.find()) + { + prefix = mtmp3.group(1); + } + if(Num2-Num1>0 && (Num2-Num1<=20)) + { + for(int n=Num1;n<=Num2;n++) + { + String StrNum=S1+prefix+n; + if(StrNum.length()>=5) + { + location.add(start+"\t"+last+"\t"+StrNum+"\tGeneID"); + } + } + } + } + } + else if(mtmp2.find()) + { + if(mtmp2.group(2).length()<=6 && mtmp2.group(4).length()<=6) + { + String S1 = mtmp2.group(1); + int Num1 = Integer.parseInt(mtmp2.group(2)); + String S2 = mtmp2.group(3); + int Num2 = Integer.parseInt(mtmp2.group(4)); + if(S1.equals(S2)) + { + String prefix = ""; + Pattern ptmp3 = Pattern.compile("^([0]+)"); + Matcher mtmp3 = ptmp3.matcher(mtmp2.group(2)); + if(mtmp3.find()) + { + prefix = mtmp3.group(1); + } + if(Num2-Num1>0 && (Num2-Num1<=20)) + { + for(int n=Num1;n<=Num2;n++) + { + String StrNum=S1+prefix+n; + if(StrNum.length()>=5) + { + location.add(start+"\t"+last+"\t"+StrNum+"\tGeneID"); + } + } + } + } + } + } + } + location.add(start+"\t"+last+"\t"+mention+"\tGeneID"); + } + } + String men=""; + for(int m=0;m locations = SearchGeneIDLocation(PassageContext); + for (int k = 0 ; k < locations.size() ; k++) + { + String anno[]=locations.get(k).split("\t"); + String mention = anno[2].toLowerCase(); + mention = mention.replaceAll("[\\W\\-\\_]+", ""); + if(GNormPlus.GeneIDs_hash.containsKey(mention)) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(locations.get(k)+"\tGeneID:"+GNormPlus.GeneIDs_hash.get(mention)); //paragraph + } + } + } + } + GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,true,true); + } } \ No newline at end of file diff --git a/src_Java/GNormPluslib/GNR.java b/src_Java/GNormPluslib/GNR.java index 107f2287c6d67281196ad6ea79d5542cf0954a2f..945809d5587b30f9a49d8100d5c7cbae2ae4efa1 100644 --- a/src_Java/GNormPluslib/GNR.java +++ b/src_Java/GNormPluslib/GNR.java @@ -1,1602 +1,1602 @@ -/** - * Project: GNormPlus - * Function: Gene Name Recognition - */ - -package GNormPluslib; - -import java.io.*; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import javax.xml.stream.XMLStreamException; - -import org.tartarus.snowball.SnowballStemmer; -import org.tartarus.snowball.ext.englishStemmer; - -import GNormPluslib.GNormPlus; -import GNormPluslib.BioCDoc; - -public class GNR -{ - /* - * Read BioC files - */ - public void Ab3P(String Filename,String FilenameAbb,String TrainTest) throws XMLStreamException,IOException - { - /** Abbreviation*/ - //BioC -> Abb input - String line=""; - BufferedWriter FileAbb = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameAbb), "UTF-8")); - for (int i = 0; i < GNormPlus.BioCDocobj.PMIDs.size(); i++) - { - String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); - String Context=""; - for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) - { - String PassageContext=GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); - if(PassageContext.matches(".*\\([^\\(\\)]+,[^\\(\\)]+\\).*")) - { - PassageContext=PassageContext.replaceAll("\\([^\\(\\)]+,[^\\(\\)]+\\)", ""); - } - if(PassageContext.contains("\\(")) - { - Context = Context+PassageContext+" "; - } - } - FileAbb.write(Pmid+"\n"+Context+"\n\n"); - } - FileAbb.close(); - //Abb - File f = new File(FilenameAbb+".out"); - BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); - Runtime runtime = Runtime.getRuntime(); - String cmd ="./Ab3P "+FilenameAbb+".Abb "+FilenameAbb+".out"; - - String OS=System.getProperty("os.name").toLowerCase(); - if(OS.contains("windows")) - { - cmd ="java -jar bioadi.jar "+FilenameAbb; - } - else //if(OS.contains("nux")||OS.contains("nix")) - { - cmd ="./Ab3P "+FilenameAbb+" "+FilenameAbb+".out"; - //cmd ="java -jar bioadi.jar "+FilenameAbb+" > "+FilenameAbb+".out"; - } - - Process process = runtime.exec(cmd); - InputStream is = process.getInputStream(); - InputStreamReader isr = new InputStreamReader(is, "UTF-8"); - BufferedReader br = new BufferedReader(isr); - line=""; - while ( (line = br.readLine()) != null) - { - fr.write(line); - fr.newLine(); - fr.flush(); - } - is.close(); - isr.close(); - br.close(); - fr.close(); - //Abb output -> Hash - BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameAbb+".out"), "UTF-8")); - line=""; - String pmid=""; - while ((line = inputfile.readLine()) != null) - { - String patt="^ (.+)\\|(.+)\\|([0-9\\.]+)$"; - Pattern ptmp = Pattern.compile(patt); - Matcher mtmp = ptmp.matcher(line); - if(line.matches("^[0-9]+$")) - { - pmid=line; - } - if(mtmp.find()) - { - String SF = mtmp.group(1); - String LF = mtmp.group(2); - double weight= Double.parseDouble(mtmp.group(3)); - GNormPlus.Pmid2Abb_hash.put(pmid+"\t"+SF, "Abb:SF"); - GNormPlus.Pmid2Abb_hash.put(pmid+"\t"+LF, "Abb:LF"); - GNormPlus.PmidLF2Abb_lc_hash.put(pmid+"\t"+LF.toLowerCase(), SF.toLowerCase()); - GNormPlus.PmidAbb2LF_lc_hash.put(pmid+"\t"+SF.toLowerCase(), LF.toLowerCase()); - GNormPlus.PmidAbb2LF_hash.put(pmid+"\t"+SF, LF); - if(weight >= 0.9) - { - GNormPlus.PmidLF2Abb_hash.put(pmid+"\t"+LF, SF); - } - } - } - inputfile.close(); - } - - public void LoadInputFile(String Filename,String FilenameAbb,String TrainTest) throws XMLStreamException,IOException - { - /** Read BioC file */ - //if(TrainTest.equals("Train")) - //{ - GNormPlus.BioCDocobj.BioCReaderWithAnnotation(Filename); - //} - //else - //{ - // GNormPlus.BioCDocobj.BioCReader(Filename); - //} - - - /** Abbreviation*/ - //BioC -> Abb input - String line=""; - BufferedWriter FileAbb = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameAbb), "UTF-8")); - for (int i = 0; i < GNormPlus.BioCDocobj.PMIDs.size(); i++) - { - String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); - String Context="Text:"; - for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) - { - String PassageContext=GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); - if(PassageContext.matches(".*\\([^\\(\\)]+,[^\\(\\)]+\\).*")) - { - PassageContext=PassageContext.replaceAll("\\([^\\(\\)]+,[^\\(\\)]+\\)", ""); - } - if(PassageContext.contains("(")) - { - Context = Context+PassageContext+" "; - } - } - FileAbb.write(Pmid+"\n"+Context+"\n\n"); - } - FileAbb.close(); - //Abb - File f = new File(FilenameAbb+".out"); - BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); - Runtime runtime = Runtime.getRuntime(); - String cmd ="./Ab3P "+FilenameAbb+".Abb "+FilenameAbb+".out"; - - String OS=System.getProperty("os.name").toLowerCase(); - if(OS.contains("windows")) - { - cmd ="java -jar bioadi.jar "+FilenameAbb; - } - else //if(OS.contains("nux")||OS.contains("nix")) - { - cmd ="./Ab3P "+FilenameAbb+" "+FilenameAbb+".out"; - //cmd ="java -jar bioadi.jar "+FilenameAbb+" > "+FilenameAbb+".out"; - } - - Process process = runtime.exec(cmd); - InputStream is = process.getInputStream(); - InputStreamReader isr = new InputStreamReader(is, "UTF-8"); - BufferedReader br = new BufferedReader(isr); - line=""; - while ( (line = br.readLine()) != null) - { - fr.write(line); - fr.newLine(); - fr.flush(); - } - is.close(); - isr.close(); - br.close(); - fr.close(); - //Abb output -> Hash - BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameAbb+".out"), "UTF-8")); - line=""; - String pmid=""; - while ((line = inputfile.readLine()) != null) - { - String patt="^ (.+)\\|(.+)\\|([0-9\\.]+)$"; - Pattern ptmp = Pattern.compile(patt); - Matcher mtmp = ptmp.matcher(line); - if(line.matches("^[0-9]+$")) - { - pmid=line; - } - if(mtmp.find()) - { - String SF = mtmp.group(1); - String LF = mtmp.group(2); - double weight= Double.parseDouble(mtmp.group(3)); - GNormPlus.Pmid2Abb_hash.put(pmid+"\t"+SF, "Abb:SF"); - GNormPlus.Pmid2Abb_hash.put(pmid+"\t"+LF, "Abb:LF"); - GNormPlus.PmidLF2Abb_lc_hash.put(pmid+"\t"+LF.toLowerCase(), SF.toLowerCase()); - GNormPlus.PmidAbb2LF_lc_hash.put(pmid+"\t"+SF.toLowerCase(), LF.toLowerCase()); - GNormPlus.PmidAbb2LF_hash.put(pmid+"\t"+SF, LF); - if(weight >= 0.9) - { - GNormPlus.PmidLF2Abb_hash.put(pmid+"\t"+LF, SF); - } - } - } - inputfile.close(); - } - - /* - * Feature Extraction - */ - public void FeatureExtraction(String FilenameData,String FilenameLoca,String TrainTest) throws XMLStreamException - { - try - { - /** output files */ - BufferedWriter FileLocation = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameLoca), "UTF-8")); // .location - BufferedWriter FileData = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameData), "UTF-8")); // .data - //NLP modules - SnowballStemmer stemmer = new englishStemmer(); - /** PMIDs : i */ - for (int i = 0; i < GNormPlus.BioCDocobj.PMIDs.size(); i++) - { - String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); - - /** Paragraphs : j */ - for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) - { - String PassageName= GNormPlus.BioCDocobj.PassageNames.get(i).get(j); // Passage name - int PassageOffset = GNormPlus.BioCDocobj.PassageOffsets.get(i).get(j); // Passage offset - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context - ArrayList Annotation = GNormPlus.BioCDocobj.Annotations.get(i).get(j); // Annotation - HashMap CTDGene_hash = new HashMap(); - HashMap FamilyName_hash = new HashMap(); - HashMap character_hash = new HashMap(); - HashMap Abbreviation_hash = new HashMap(); - String PassageContext_tmp=" "+PassageContext+" "; - - /** Abbreviation */ - HashMap Abb_sortebylength = new HashMap(); - ArrayList length_list = new ArrayList(); - int countn=0; - for (Object key : GNormPlus.Pmid2Abb_hash.keySet()) - { - String pmid2abb[]=key.toString().split("\t"); - if(Pmid.equals(pmid2abb[0])) - { - Abb_sortebylength.put(pmid2abb[1].length()*100+countn, pmid2abb[1]); - length_list.add(pmid2abb[1].length()*100+countn); - countn++; - } - } - Collections.sort(length_list); - for (int l=length_list.size()-1;l>=0;l--) - { - String AbbLF = Abb_sortebylength.get(length_list.get(l)); - AbbLF=AbbLF.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1"); - AbbLF=AbbLF.replaceAll(" ", "\\[ \\]\\+"); - Pattern ptmp = Pattern.compile("^(.*[^A-Za-z0-9]+)("+AbbLF+")([^A-Za-z0-9]+.*)$"); - Matcher mtmp = ptmp.matcher(PassageContext_tmp); - while(mtmp.find()) - { - String str1=mtmp.group(1); - String str2=mtmp.group(2); - String str3=mtmp.group(3); - for(int m=str1.length();m<=(str1.length()+str2.length());m++) - { - Abbreviation_hash.put((m-1),GNormPlus.Pmid2Abb_hash.get(Pmid+"\t"+Abb_sortebylength.get(length_list.get(l)))); - } - String men=""; - for(int m=0;m locations = GNormPlus.PT_CTDGene.SearchMentionLocation(PassageContext,"CTDGene"); - for (int k = 0 ; k < locations.size() ; k++) - { - String anno[]=locations.get(k).split("\t"); - int start= Integer.parseInt(anno[0]) + PassageOffset; - int last= Integer.parseInt(anno[1]) + PassageOffset; - String mention = anno[2]; - String id = anno[3]; - - CTDGene_hash.put(start,"CTDGene_B"); - CTDGene_hash.put(last,"CTDGene_E"); - for(int s=start+1;s locations_Fname = GNormPlus.PT_FamilyName.SearchMentionLocation(PassageContext,"FamilyName"); - for (int k = 0 ; k < locations_Fname.size() ; k++) - { - String anno[]=locations_Fname.get(k).split("\t"); - int start= Integer.parseInt(anno[0]) + PassageOffset; - int last= Integer.parseInt(anno[1]) + PassageOffset; - String mention = anno[2]; - String id = anno[3]; - - if(!CTDGene_hash.containsKey(start)) - { - FamilyName_hash.put(start,"famplex_B"); - FamilyName_hash.put(last,"famplex_E"); - for(int s=start+1;stokens[p].length() && PassageContext_tmp.substring(tokens[p].length(),tokens[p].length()+1).equals(" ")) - { - WSF="WSF:Gap"; - } - if(p==0) - { - WSB="WSB:1st"; - } - else if(p==tokens.length-1) - { - WSF="WSF:last"; - } - - if(PassageContext_tmp.substring(0,tokens[p].length()).equals(tokens[p])) - { - if(tokens[p].length()>0) - { - /* - * .loca - */ - int start=Offset; - int last=Offset+tokens[p].length(); - String State=""; - if(!character_hash.containsKey(start) || !character_hash.containsKey(last)){} - else if(character_hash.get(start).matches(".*B$")) - { - State=character_hash.get(start); - } - else if(character_hash.get(last).matches(".*E$")) - { - State=character_hash.get(last); - } - else if(character_hash.get(start).matches(".*I$")) - { - State=character_hash.get(start); - } - - if((!tokens[p].equals("\t"))) - { - FileLocation.write(Pmid+"\t"+PassageName+"\t"+j+"\t"+tokens[p]+"\t"+(Offset+1)+"\t"+(Offset+tokens[p].length())+"\t"+State+"\n"); - } - - /* - * .data - */ - - //Abbreviation - String Abb_State="__nil__"; - if(!Abbreviation_hash.containsKey(start) || !Abbreviation_hash.containsKey(last)){Abb_State="__nil__";} - else if(Abbreviation_hash.containsKey(start)) - { - Abb_State=Abbreviation_hash.get(start); - } - - //CTDGene - start=PassageOffset+Offset; - last=PassageOffset+Offset+tokens[p].length(); - String CTDGene_State="__nil__"; - if(!CTDGene_hash.containsKey(start) || !CTDGene_hash.containsKey(last)){CTDGene_State="__nil__";} - else if(CTDGene_hash.get(start).matches(".*B$")) - { - CTDGene_State=CTDGene_hash.get(start); - } - else if(CTDGene_hash.get(last).matches(".*E$")) - { - CTDGene_State=CTDGene_hash.get(last); - } - else if(CTDGene_hash.get(start).matches(".*I$")) - { - CTDGene_State=CTDGene_hash.get(start); - } - - //FamilyName - if(CTDGene_State.equals("__nil__")) - { - start=PassageOffset+Offset; - last=PassageOffset+Offset+tokens[p].length(); - if(!FamilyName_hash.containsKey(start) || !FamilyName_hash.containsKey(last)){} - else if(FamilyName_hash.get(start).matches(".*B$")) - { - CTDGene_State=FamilyName_hash.get(start); - } - else if(FamilyName_hash.get(last).matches(".*E$")) - { - CTDGene_State=FamilyName_hash.get(last); - } - else if(FamilyName_hash.get(start).matches(".*I$")) - { - CTDGene_State=FamilyName_hash.get(start); - } - } - - //stemming - stemmer.setCurrent(tokens[p].toLowerCase()); - stemmer.stem(); - String stem=stemmer.getCurrent(); - - //Number of Numbers [0-9] - String Num_num=""; - String tmp=tokens[p]; - tmp=tmp.replaceAll("[^0-9]",""); - if(tmp.length()>3){Num_num="N:4+";}else{Num_num="N:"+ tmp.length();} - - //Number of Uppercase [A-Z] - String Num_Uc=""; - tmp=tokens[p]; - tmp=tmp.replaceAll("[^A-Z]",""); - if(tmp.length()>3){Num_Uc="U:4+";}else{Num_Uc="U:"+ tmp.length();} - - //Number of Lowercase [a-z] - String Num_lc=""; - tmp=tokens[p]; - tmp=tmp.replaceAll("[^a-z]",""); - if(tmp.length()>3){Num_lc="L:4+";}else{Num_lc="L:"+ tmp.length();} - - //Number of ALL char - String Num_All=""; - if(tokens[p].length()>3){Num_All="A:4+";}else{Num_All="A:"+ tokens[p].length();} - - //specific character (;:,.->+_) - String SpecificC="__nil__"; - if(tokens[p].equals(";") || tokens[p].equals(":") || tokens[p].equals(",") || tokens[p].equals(".") || tokens[p].equals("-") || tokens[p].equals(">") || tokens[p].equals("+") || tokens[p].equals("_")) - { - SpecificC="-SpecificC1-"; - } - else if(tokens[p].equals("(") || tokens[p].equals(")")) - { - SpecificC="-SpecificC2-"; - } - else if(tokens[p].equals("{") || tokens[p].equals("}")) - { - SpecificC="-SpecificC3-"; - } - else if(tokens[p].equals("[") || tokens[p].equals("]")) - { - SpecificC="-SpecificC4-"; - } - else if(tokens[p].equals("\\") || tokens[p].equals("/")) - { - SpecificC="-SpecificC5-"; - } - - //Chemical Prefix/Suffix - String ChemPreSuf="__nil__"; - if(tokens[p].matches(".*(yl|ylidyne|oyl|sulfonyl)")){ChemPreSuf="-CHEMinlineSuffix-";} - else if(tokens[p].matches("(meth|eth|prop|tetracos).*")){ChemPreSuf="-CHEMalkaneStem-";} - else if(tokens[p].matches("(di|tri|tetra).*")){ChemPreSuf="-CHEMsimpleMultiplier-";} - else if(tokens[p].matches("(benzen|pyridin|toluen).*")){ChemPreSuf="-CHEMtrivialRing-";} - else if(tokens[p].matches(".*(one|ol|carboxylic|amide|ate|acid|ium|ylium|ide|uide|iran|olan|inan|pyrid|acrid|amid|keten|formazan|fydrazin)(s|)")){ChemPreSuf="-CHEMsuffix-";} - - - //Mention Type - String MentionType="__nil__"; - /* - if($tmp eq "to" && $CTD_result_hash{$count_token-1} eq "CTD_gene" && $CTD_result_hash{$count_token+1} eq "CTD_gene"){$CTD_result_hash{$count_token}="CTD_gene";} - if($tmp=~/^(or|and|,)$/ && $CTD_result_hash{$count_token-1} eq "CTD_gene" && $CTD_result_hash{$count_token+1} eq "CTD_gene"){$MentionType="-Type_GeneConjunction-";} - elsif($tmp=~/^(or|and|,)$/ && $last_token=~/^(or|and|,)$/ && $CTD_result_hash{$count_token-2} eq "CTD_gene" && $CTD_result_hash{$count_token+1} eq "CTD_gene"){$MentionType="-Type_GeneConjunction-";} - elsif($tmp=~/^(or|and|,)$/ && $next_token=~/^(or|and|,)$/ && $CTD_result_hash{$count_token-1} eq "CTD_gene" && $CTD_result_hash{$count_token+2} eq "CTD_gene"){$MentionType="-Type_GeneConjunction-";} - */ - if(tokens[p].matches("(ytochrome|cytochrome)")){MentionType="-Type_cytochrome-";} - else if(tokens[p].matches(".*target") ){MentionType="-Type_target-";} - else if(tokens[p].matches(".*(irradiation|hybrid|fusion|experiment|gst|est|gap|antigen)") ){MentionType="-Type_ExperimentNoun-";} - else if(tokens[p].matches(".*(disease|disorder|dystrophy|deficiency|syndrome|dysgenesis|cancer|injury|neoplasm|diabetes|diabete)") ){MentionType="-Type_Disease-";} - else if(tokens[p].matches(".*(motif|domain|omain|binding|site|region|sequence|frameshift|finger|box).*") ){MentionType="-Type_DomainMotif-";} - else if(tokens[p].equals("-") && (p0 && tokens[p-1].matches("^[0-9]+$")) ) ){MentionType="-Type_ChromosomeStrain-";} - else if(tokens[p].matches(".*(related|regulated|associated|correlated|reactive).*")){MentionType="-Type_relation-";} - else if(tokens[p].toLowerCase().matches(".*(polymorphism|mutation|deletion|insertion|duplication|genotype|genotypes).*") ){MentionType="-Type_VariationTerms-";} - else if(tokens[p].matches(".*(oxidase|transferase|transferases|kinase|kinese|subunit|unit|receptor|adrenoceptor|transporter|regulator|transcription|antigen|protein|gene|factor|member|molecule|channel|deaminase|spectrin).*") ){MentionType="-Type_suffix-";} - else if(tokens[p].matches("[\\(\\-\\_]") && (p=1){ prefix=tmp.substring(0, 1);}else{prefix="__nil__";} - if(tmp.length()>=2){ prefix=prefix+" "+tmp.substring(0, 2);}else{prefix=prefix+" __nil__";} - if(tmp.length()>=3){ prefix=prefix+" "+tmp.substring(0, 3);}else{prefix=prefix+" __nil__";} - if(tmp.length()>=4){ prefix=prefix+" "+tmp.substring(0, 4);}else{prefix=prefix+" __nil__";} - if(tmp.length()>=5){ prefix=prefix+" "+tmp.substring(0, 5);}else{prefix=prefix+" __nil__";} - - - //suffix - String suffix=""; - tmp=tokens[p]; - if(tmp.length()>=1){ suffix=tmp.substring(tmp.length()-1, tmp.length());}else{suffix="__nil__";} - if(tmp.length()>=2){ suffix=suffix+" "+tmp.substring(tmp.length()-2, tmp.length());}else{suffix=suffix+" __nil__";} - if(tmp.length()>=3){ suffix=suffix+" "+tmp.substring(tmp.length()-3, tmp.length());}else{suffix=suffix+" __nil__";} - if(tmp.length()>=4){ suffix=suffix+" "+tmp.substring(tmp.length()-4, tmp.length());}else{suffix=suffix+" __nil__";} - if(tmp.length()>=5){ suffix=suffix+" "+tmp.substring(tmp.length()-5, tmp.length());}else{suffix=suffix+" __nil__";} - - if(State.equals("")) - { - State="O"; - } - - if((!tokens[p].equals("\t"))) - { - if(TrainTest.equals("Train")) - { - FileData.write(tokens[p]+" "+stem+" "+WSB+" "+WSF+" "+Num_num+" "+Num_Uc+" "+Num_lc+" "+Num_All+" "+SpecificC+" "+ChemPreSuf+" "+MentionType+" "+ProteinSym+" "+prefix+" "+suffix+" "+CTDGene_State+" "+Abb_State+" "+State+"\n"); - } - else - { - FileData.write(tokens[p]+" "+stem+" "+WSB+" "+WSF+" "+Num_num+" "+Num_Uc+" "+Num_lc+" "+Num_All+" "+SpecificC+" "+ChemPreSuf+" "+MentionType+" "+ProteinSym+" "+prefix+" "+suffix+" "+CTDGene_State+" "+Abb_State+"\n"); - } - } - PassageContext_tmp=PassageContext_tmp.substring(tokens[p].length()); // remove the token for the context - Offset=Offset+tokens[p].length(); - } - } - } - if(tokens.length>0) - { - FileLocation.write("\n"); - FileData.write("\n"); - } - } - } - FileLocation.close(); - FileData.close(); - } - catch(IOException e1){ System.out.println("[MR]: Input file is not exist.");} - } - /* - * Testing by CRF++ - */ - public void CRF_test(String model, String FilenameData, String FilenameOutput) throws IOException - { - File f = new File(FilenameOutput); - BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); - - Runtime runtime = Runtime.getRuntime(); - - String OS=System.getProperty("os.name").toLowerCase(); - - String cmd="./CRF/crf_test -m "+model+" -o "+FilenameOutput+" "+FilenameData; - if(OS.contains("windows")) - { - cmd ="CRF/crf_test -m "+model+" -o "+FilenameOutput+" "+FilenameData; - } - else //if(OS.contains("nux")||OS.contains("nix")) - { - cmd ="./CRF/crf_test -m "+model+" -o "+FilenameOutput+" "+FilenameData; - } - - try { - Process process = runtime.exec(cmd); - InputStream is = process.getInputStream(); - InputStreamReader isr = new InputStreamReader(is, "UTF-8"); - BufferedReader br = new BufferedReader(isr); - String line=""; - while ( (line = br.readLine()) != null) - { - fr.write(line); - fr.newLine(); - fr.flush(); - } - is.close(); - isr.close(); - br.close(); - fr.close(); - } - catch (IOException e) { - System.out.println(e); - runtime.exit(0); - } - } - - public void CRF_test(String model,String FilenameData,String FilenameOutput,String top3) throws IOException - { - File f = new File(FilenameOutput); - BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); - - Runtime runtime = Runtime.getRuntime(); - - String OS=System.getProperty("os.name").toLowerCase(); - - String cmd="./CRF/crf_test -n 3 -m "+model+" -o "+FilenameOutput+" "+FilenameData; - if(OS.contains("windows")) - { - cmd ="CRF/crf_test -n 3 -m "+model+" -o "+FilenameOutput+" "+FilenameData; - } - else //if(OS.contains("nux")||OS.contains("nix")) - { - cmd ="./CRF/crf_test -n 3 -m "+model+" -o "+FilenameOutput+" "+FilenameData; - } - - try { - Process process = runtime.exec(cmd); - InputStream is = process.getInputStream(); - InputStreamReader isr = new InputStreamReader(is, "UTF-8"); - BufferedReader br = new BufferedReader(isr); - String line=""; - while ( (line = br.readLine()) != null) - { - fr.write(line); - fr.newLine(); - fr.flush(); - } - is.close(); - isr.close(); - br.close(); - fr.close(); - } - catch (IOException e) { - System.out.println(e); - runtime.exit(0); - } - } - - /* - * Learning model by CRF++ - */ - public void CRF_learn(String model, String FilenameData) throws IOException - { - Runtime runtime = Runtime.getRuntime(); - - Process process = null; - String line = null; - InputStream is = null; - InputStreamReader isr = null; - BufferedReader br = null; - - String OS=System.getProperty("os.name").toLowerCase(); - - String cmd="./CRF/crf_learn -f 3 -c 4.0 CRF/template_UB "+FilenameData+" "+model; - if(OS.contains("windows")) - { - cmd ="CRF/crf_learn -f 3 -c 4.0 CRF/template_UB "+FilenameData+" "+model; - } - else //if(OS.contains("nux")||OS.contains("nix")) - { - cmd ="./CRF/crf_learn -f 3 -c 4.0 CRF/template_UB "+FilenameData+" "+model; - } - - try { - process = runtime.exec(cmd); - is = process.getInputStream(); - isr = new InputStreamReader(is, "UTF-8"); - br = new BufferedReader(isr); - while ( (line = br.readLine()) != null) - { - System.out.println(line); - System.out.flush(); - } - is.close(); - isr.close(); - br.close(); - } - catch (IOException e) { - System.out.println(e); - runtime.exit(0); - } - } - - public void ReadCRFresult(String Filename,String FilenameLoca,String FilenameOutput,String FilenameBioC) throws XMLStreamException, IOException - { - /** load CRF output */ - ArrayList outputArr = new ArrayList(); - BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameOutput), "UTF-8")); - String line; - while ((line = inputfile.readLine()) != null) - { - outputArr.add(line); - } - inputfile.close(); - - /** load location */ - ArrayList locationArr = new ArrayList(); - inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameLoca), "UTF-8")); - while ((line = inputfile.readLine()) != null) - { - locationArr.add(line); - } - inputfile.close(); - - /** output -> mentions */ - String pmid_last=""; - String paragraph_num_last=""; - String pmid=""; - String paragraph=""; - String paragraph_num=""; - Pattern pat_B = Pattern.compile("((FamilyName|DomainMotif|Gene)_[B])$"); - Pattern pat_IE = Pattern.compile("((FamilyName|DomainMotif|Gene)_[IE])$"); - ArrayList> AnnotationInPMID = new ArrayList(); // array of Annotations in the PMIDs - ArrayList AnnotationInPassage= new ArrayList(); // array of Annotations in the Passage - GNormPlus.BioCDocobj.Annotations = new ArrayList(); - int countPMID=0; - int countPassage=0; - /** outputArr */ - for(int i=0;i3) - { - pmid=locationRow[0]; - paragraph=locationRow[1]; - paragraph_num=locationRow[2]; - } - - if( (!paragraph_num_last.equals("")) && (!paragraph_num.equals(paragraph_num_last)) ) - { - AnnotationInPMID.add(AnnotationInPassage); - AnnotationInPassage = new ArrayList(); - countPassage++; - } - if( (!pmid_last.equals("")) && (!pmid.equals(pmid_last)) ) - { - GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); - AnnotationInPMID = new ArrayList(); - countPMID++; - countPassage=0; - } - - boolean F = false; //Flag of Finding - if(locationRow.length>2) - { - Matcher mat = pat_B.matcher(outputsRow[outputsRow.length-1]); // last column : Status - while(mat.find() && locationRow.length==6) - { - MentionType=mat.group(2); - pmid=locationRow[0]; - paragraph_num=locationRow[2]; - int start_tmp=Integer.parseInt(locationRow[4])-1; - int last_tmp=Integer.parseInt(locationRow[5]); - if(start_tmplast){last=last_tmp;} - i++; - F = true; - if(locationArr.get(i).length()>0) - { - outputsRow=outputArr.get(i).split("\\t"); - locationRow=locationArr.get(i).split("\\t"); - mat = pat_IE.matcher(outputsRow[outputsRow.length-1]); - } - else - { - break; - } - } - } - - if(F == true) - { - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage); // Passage context - String Mention = PassageContext.substring(start, last); - String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", ""); - if(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*")){} - else if(Mention.matches("[A-Z][A-Z]s")){} - else if(Mention.matches(".*\\|.*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*")){} - else - { - AnnotationInPassage.add(start+"\t"+last+"\t"+Mention+"\t"+MentionType); - } - i--; - } - - paragraph_num_last=paragraph_num; - pmid_last=pmid; - }// outputArr1 - AnnotationInPMID.add(AnnotationInPassage); - GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); - - //GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,false); //save in BioC file - } - - public void ReadCRFresult(String Filename,String FilenameLoca,String FilenameOutput,String FilenameBioC,double threshold,double threshold_GeneType) throws XMLStreamException, IOException - { - /** load CRF output */ - ArrayList outputArr1 = new ArrayList(); - ArrayList outputArr2 = new ArrayList(); - ArrayList outputArr3 = new ArrayList(); - ArrayList outputArr1_score = new ArrayList(); - ArrayList outputArr2_score = new ArrayList(); - ArrayList outputArr3_score = new ArrayList(); - BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameOutput), "UTF-8")); - String line; - int rank=0; - String score=""; - Pattern pat_Rank = Pattern.compile("^# ([0-2]) ([0-9\\.]+)$"); - while ((line = inputfile.readLine()) != null) - { - Matcher mat = pat_Rank.matcher(line); // last column : Status - if(mat.find()) - { - rank = Integer.parseInt(mat.group(1)); - score = mat.group(2); - } - else if(rank == 0) - { - outputArr1.add(line); - outputArr1_score.add(score); - } - else if(rank == 1) - { - outputArr2.add(line); - outputArr2_score.add(score); - } - else if(rank == 2) - { - outputArr3.add(line); - outputArr3_score.add(score); - } - } - inputfile.close(); - - /** load location */ - ArrayList locationArr = new ArrayList(); - inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameLoca), "UTF-8")); - while ((line = inputfile.readLine()) != null) - { - locationArr.add(line); - } - inputfile.close(); - - /** output -> mentions */ - String pmid_last=""; - String paragraph_num_last=""; - String pmid=""; - String paragraph=""; - String paragraph_num=""; - Pattern pat_B = Pattern.compile("((FamilyName|DomainMotif|Gene)_[B])$"); - Pattern pat_IE = Pattern.compile("((FamilyName|DomainMotif|Gene)_[IE])$"); - ArrayList> AnnotationInPMID = new ArrayList(); // array of Annotations in the PMIDs - ArrayList AnnotationInPassage= new ArrayList(); // array of Annotations in the Passage - GNormPlus.BioCDocobj.Annotations = new ArrayList(); - int countPMID=0; - int countPassage=0; - /** outputArr1 */ - int size_Arr=outputArr1.size(); - if(locationArr.size()3) - { - pmid=locationRow[0]; - paragraph=locationRow[1]; - paragraph_num=locationRow[2]; - } - - boolean F = false; //Flag of Finding - if(outputsRow.length>=1) - { - Matcher mat = pat_B.matcher(outputsRow[outputsRow.length-1]); // last column : Status - while(mat.find() && locationRow.length==6) - { - MentionType=mat.group(2); - pmid=locationRow[0]; - int start_tmp=Integer.parseInt(locationRow[4])-1; - int last_tmp=Integer.parseInt(locationRow[5]); - if(start_tmplast){last=last_tmp;} - i++; - outputsRow=outputArr1.get(i).split("\\t"); - locationRow=locationArr.get(i).split("\\t"); - mat = pat_IE.matcher(outputsRow[outputsRow.length-1]); - F = true; - } - } - - if( (!paragraph_num_last.equals("")) && (!paragraph_num.equals(paragraph_num_last)) ) // paragraph change - { - AnnotationInPMID.add(AnnotationInPassage); - AnnotationInPassage = new ArrayList(); - countPassage++; - } - - if( !pmid.equals(pmid_last) && paragraph_num.equals("0") && paragraph_num_last.equals("0") ) // pmid change (special case : the article only has one paragrpah) - { - AnnotationInPMID.add(AnnotationInPassage); - AnnotationInPassage = new ArrayList(); - GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); - AnnotationInPMID = new ArrayList(); - countPMID++; - countPassage=0; - } - else if( (!pmid_last.equals("")) && (!pmid.equals(pmid_last)) ) // pmid change - { - GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); - AnnotationInPMID = new ArrayList(); - countPMID++; - countPassage=0; - } - - if(F == true) - { - if(GNormPlus.BioCDocobj.PassageContexts.size()>countPMID && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).size()>countPassage && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage).length()>=last && (last-start)<1000) - { - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage); // Passage context - String Mention = PassageContext.substring(start, last); - String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", ""); - if(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*")){} - else if(Mention.matches("[A-Z][A-Z]s")){} - else if(Mention.matches(".*\\|.*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*")){} - else if((GNormPlus.Abb2Longformtok_hash.containsKey(Mention_nospace.toLowerCase())) && (PassageContext.toLowerCase().matches(".*[\\W\\-\\-]("+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())+")[\\W\\-\\-].*"))) - { - //System.out.println(Mention_nospace.toLowerCase()+"\t"+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())); - } - else - { - AnnotationInPassage.add(start+"\t"+last+"\t"+Mention+"\t"+MentionType); - } - } - i--; - } - paragraph_num_last=paragraph_num; - pmid_last=pmid; - }// outputArr1 - AnnotationInPMID.add(AnnotationInPassage); - GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); - - /** outputArr2 */ - pmid_last=""; - paragraph_num_last=""; - pmid=""; - paragraph=""; - paragraph_num=""; - countPMID=0; - countPassage=0; - size_Arr=outputArr2.size(); - if(locationArr.size()2) - { - pmid=locationRow[0]; - paragraph=locationRow[1]; - paragraph_num=locationRow[2]; - } - - boolean F = false; //Flag of Finding - if(outputsRow.length>=1) - { - Matcher mat = pat_B.matcher(outputsRow[outputsRow.length-1]); // last column : Status - while(mat.find() && locationRow.length==6) - { - MentionType=mat.group(2); - pmid=locationRow[0]; - int start_tmp=Integer.parseInt(locationRow[4])-1; - int last_tmp=Integer.parseInt(locationRow[5]); - if(start_tmplast){last=last_tmp;} - i++; - outputsRow=outputArr2.get(i).split("\\t"); - locationRow=locationArr.get(i).split("\\t"); - mat = pat_IE.matcher(outputsRow[outputsRow.length-1]); - F = true; - } - } - - if( (!paragraph_num_last.equals("")) && (!paragraph_num.equals(paragraph_num_last)) ) // paragraph change - { - countPassage++; - } - - if( !pmid.equals(pmid_last) && paragraph_num.equals("0") && paragraph_num_last.equals("0") ) // pmid change (special case : the article only has one paragrpah) - { - countPMID++; - countPassage=0; - } - else if( (!pmid_last.equals("")) && (!pmid.equals(pmid_last)) ) // pmid change - { - countPMID++; - countPassage=0; - } - - if(F == true) - { - if(GNormPlus.BioCDocobj.PassageContexts.size()>countPMID && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).size()>countPassage && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage).length()>=last && (last-start)<1000) - { - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage); // Passage context - String Mention = PassageContext.substring(start, last); - String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", ""); - if(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*")){} - else if(Mention.matches("[A-Z][A-Z]s")){} - else if(Mention.matches(".*\\|.*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*")){} - else if((GNormPlus.Abb2Longformtok_hash.containsKey(Mention_nospace.toLowerCase())) && (PassageContext.toLowerCase().matches(".*[\\W\\-\\-]("+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())+")[\\W\\-\\-].*"))) - { - //System.out.println(Mention_nospace.toLowerCase()+"\t"+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())); - } - else if(Double.parseDouble(outputArr2_score.get(i))>threshold) - { - boolean overlap=false; - for(int j=0;jthreshold_GeneType && GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).get(j).matches(start+"\t"+last+"\t"+Mention_tmp+"\t(FamilyName|DomainMotif)") ) - { - GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).set(j, start+"\t"+last+"\t"+Mention+"\t"+MentionType); - } - else if( (start>=startj && startstartj && last<=lastj) ) - { - overlap=true; - } - } - if(overlap == false) - { - GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).add(start+"\t"+last+"\t"+Mention+"\t"+MentionType); - } - } - } - i--; - } - - paragraph_num_last=paragraph_num; - pmid_last=pmid; - }// outputArr2 - - /** outputArr3 */ - pmid_last=""; - paragraph_num_last=""; - pmid=""; - paragraph=""; - paragraph_num=""; - countPMID=0; - countPassage=0; - size_Arr=outputArr3.size(); - if(locationArr.size()2) - { - pmid=locationRow[0]; - paragraph=locationRow[1]; - paragraph_num=locationRow[2]; - } - - boolean F = false; //Flag of Finding - if(outputsRow.length>=1) - { - Matcher mat = pat_B.matcher(outputsRow[outputsRow.length-1]); // last column : Status - while(mat.find() && locationRow.length==6) - { - MentionType=mat.group(2); - pmid=locationRow[0]; - paragraph_num=locationRow[2]; - int start_tmp=Integer.parseInt(locationRow[4])-1; - int last_tmp=Integer.parseInt(locationRow[5]); - if(start_tmplast){last=last_tmp;} - i++; - outputsRow=outputArr3.get(i).split("\\t"); - locationRow=locationArr.get(i).split("\\t"); - mat = pat_IE.matcher(outputsRow[outputsRow.length-1]); - F = true; - } - } - - if( (!paragraph_num_last.equals("")) && (!paragraph_num.equals(paragraph_num_last)) ) // paragraph change - { - countPassage++; - } - - if( !pmid.equals(pmid_last) && paragraph_num.equals("0") && paragraph_num_last.equals("0") ) // pmid change (special case : the article only has one paragrpah) - { - countPMID++; - countPassage=0; - } - else if( (!pmid_last.equals("")) && (!pmid.equals(pmid_last)) ) // pmid change - { - countPMID++; - countPassage=0; - } - - if(F == true) - { - if(GNormPlus.BioCDocobj.PassageContexts.size()>countPMID && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).size()>countPassage && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage).length()>=last && (last-start)<1000) - { - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage); // Passage context - String Mention = PassageContext.substring(start, last); - String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", ""); - if(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*")){} - else if(Mention.matches("[A-Z][A-Z]s")){} - else if(Mention.matches(".*\\|.*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*")){} - else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*")){} - else if((GNormPlus.Abb2Longformtok_hash.containsKey(Mention_nospace.toLowerCase())) && (PassageContext.toLowerCase().matches(".*[\\W\\-\\-]("+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())+")[\\W\\-\\-].*"))) - { - //System.out.println(Mention_nospace.toLowerCase()+"\t"+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())); - } - else if(Double.parseDouble(outputArr3_score.get(i))>threshold) - { - boolean overlap=false; - for(int j=0;jthreshold_GeneType && GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).get(j).matches(start+"\t"+last+"\t"+Mention_tmp+"\t(FamilyName|DomainMotif)") ) - { - GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).set(j, start+"\t"+last+"\t"+Mention+"\t"+MentionType); - } - else if( (start>=startj && startstartj && last<=lastj) ) - { - overlap=true; - } - } - if(overlap == false) - { - GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).add(start+"\t"+last+"\t"+Mention+"\t"+MentionType); - } - } - } - i--; - } - - paragraph_num_last=paragraph_num; - pmid_last=pmid; - }// outputArr3 - - //GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,false); //save in BioC file - } - - public void PostProcessing(String Filename,String FilenameBioC) throws XMLStreamException, IOException - { - /** Develop Cell | FamilyName | DomainMotif lists */ - String Disease_Suffix="disease|diseases|syndrome|syndromes|tumor|tumour|deficiency|dysgenesis|atrophy|frame|dystrophy"; - String Cell_Suffix="cell|cells"; - String FamilyName_Suffix="disease|diseases|syndrome|syndromes|tumor|tumour|deficiency|dysgenesis|atrophy|frame|dystrophy|frame|factors|family|families|superfamily|superfamilies|subfamily|subfamilies|complex|genes|proteins"; - String DomainMotif_Suffix="domain|motif|domains|motifs|sequences"; - String Strain_Suffix="alpha|beta|gamma|kappa|theta|delta|[A-Ga-g0-9]"; - ArrayList Translate2Family = new ArrayList(); - - for(int i=0;i Mention2Type_Hash = new HashMap(); // for substring detection - Extract all mentions in the target PMID : MentionList - ArrayList GeneMentionPattern = new ArrayList(); // pattern match to extend Gene - HashMap MentionType2Num = new HashMap(); // for frequency calculation - if(GNormPlus.BioCDocobj.PMIDs.size()>=i) - { - String pmid=GNormPlus.BioCDocobj.PMIDs.get(i); - for(int j=0;j RemoveList = new ArrayList(); - for(int k=0;k Family name (TIF & TIF1) */ - boolean SubSt=false; - /* - // GDNFb -> GDNF (not work on 12682085_J_Cell_Biol_2003.xml) - for (String men : Mention2Type_Hash.keySet()) - { - if((!men.equals(mention.toLowerCase())) && men.matches(mention_tmp+"[\\W\\-\\_]*("+Strain_Suffix+")")) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tFamilyName"); - if(GNormPlus.PmidLF2Abb_lc_hash.containsKey(GNormPlus.BioCDocobj.PMIDs.get(i)+"\t"+mention.toLowerCase())) - { - Translate2Family.add(GNormPlus.PmidLF2Abb_lc_hash.get(GNormPlus.BioCDocobj.PMIDs.get(i)+"\t"+mention.toLowerCase())); - } - else if(GNormPlus.PmidAbb2LF_lc_hash.containsKey(GNormPlus.BioCDocobj.PMIDs.get(i)+"\t"+mention.toLowerCase())) - { - Translate2Family.add(GNormPlus.PmidAbb2LF_lc_hash.get(GNormPlus.BioCDocobj.PMIDs.get(i)+"\t"+mention.toLowerCase())); - } - SubSt=true; - break; - } - } - */ - if(SubSt == false) - { - int BoundaryLen=15; - if(GNormPlus.BioCDocobj.PassageContexts.get(i).get(j).length() Family/Domain/Cell */ - if( mention.toLowerCase().matches(".*("+Cell_Suffix+")") || SurroundingString.matches("("+Cell_Suffix+")") ) - { - type="Cell"; - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\t"+type); - } - else if( mention.toLowerCase().matches(".*("+FamilyName_Suffix+")") || SurroundingString.matches("("+FamilyName_Suffix+")") ) - { - type="FamilyName"; - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\t"+type); - } - else if( mention.toLowerCase().matches(".*("+DomainMotif_Suffix+")")|| SurroundingString.matches("("+DomainMotif_Suffix+")") ) - { - type="DomainMotif"; - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\t"+type); - } - else if(!type.equals("Gene")) - { - /* 3. Check (Family+Domain+Cell)/All rate (threshold = 0.5) - Family/Domain/Cell -> Gene */ - double Num_FDC=0; - double Num_Gene=0; - if(MentionType2Num.containsKey(mention.toLowerCase()+"\tFamilyName")) - { - Num_FDC = Num_FDC + MentionType2Num.get(mention.toLowerCase()+"\tFamilyName"); - } - if(MentionType2Num.containsKey(mention.toLowerCase()+"\tDomainMotif")) - { - Num_FDC = Num_FDC + MentionType2Num.get(mention.toLowerCase()+"\tDomainMotif"); - } - if(MentionType2Num.containsKey(mention.toLowerCase()+"\tCell")) - { - Num_FDC = Num_FDC + MentionType2Num.get(mention.toLowerCase()+"\tCell"); - } - if(MentionType2Num.containsKey(mention.toLowerCase()+"\tGene")) - { - Num_Gene = Num_Gene + MentionType2Num.get(mention.toLowerCase()+"\tGene"); - } - if(Num_Gene/(Num_FDC+Num_Gene)>=0.5) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tGene"); - } - - /* 4. Extend Genes to Family/Domain mentions by pattern match - Family/Domain/Cell -> Gene */ - for(int p=0;p Abb.type - * - Abb only : Abb.type -> LF.type - * - LF only : LF.type -> Abb.type - */ - String lc_ment=mention.toLowerCase(); - if(GNormPlus.PmidAbb2LF_lc_hash.containsKey(pmid+"\t"+lc_ment)) //the target mention is abbreviation - { - //Infer Abbreviation by Long form - if(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment).matches(".*("+Disease_Suffix+")")) - { - //remove the mention (Abb), because the LF is a disease - } - else if(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment).matches(".*("+Cell_Suffix+")")) - { - //GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Anno[0]+"\t"+Anno[1]+"\tCell"); - } - else if(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment).matches(".*("+FamilyName_Suffix+")") && !lc_ment.matches(".+[a-z][0-9][a-z]")) //AtRPA1a in pmid:19153602 - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tFamilyName"); - } - else if(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment).matches(".*("+DomainMotif_Suffix+")")) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tDomainMotif"); - } - else - { - if(Mention2Type_Hash.containsKey(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment)) - && Mention2Type_Hash.get(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment)).equals("Gene") - && !(type.equals("Gene")) - ) // if Long Form is recognized as a Gene, and Abb is recognized as not a Gene - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tGene"); - } - } - } - } //if(Remov == true) - } - } - - for(int j=0;j GeneMentionPattern = new ArrayList(); // pattern match to extend Gene - HashMap GeneMentions = new HashMap(); // Extending Gene mentions - HashMap GeneMentionLocationGNR = new HashMap(); // Extending Gene mentions - for(int j=0;ji && GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j) - { - String PassageContexts = " " + GNormPlus.BioCDocobj.PassageContexts.get(i).get(j) + " "; - String PassageContexts_tmp = PassageContexts.toLowerCase(); - for(String gm : GeneMentions.keySet()) - { - String type=GeneMentions.get(gm); - if(type.equals("Gene")) - { - gm = gm.replaceAll("([\\W\\-\\_])", "\\\\$1"); - gm=gm.replaceAll("[0-9]", "\\[0\\-9\\]"); - gm=gm.replaceAll("(alpha|beta|gamma|theta|zeta|delta)", "(alpha\\|beta\\|gamma\\|theta\\|zeta\\|delta)"); - gm=gm.replaceAll("\\-[a-z]$", "\\-\\[a\\-z\\]"); - Pattern ptmp = Pattern.compile("^(.*[\\W\\-\\_])("+gm+")([\\W\\-\\_].*)$"); - Matcher mtmp = ptmp.matcher(PassageContexts_tmp); - while(mtmp.find()) - { - String pre = mtmp.group(1); - String gmtmp = mtmp.group(2); - String post = mtmp.group(3); - - int start = pre.length()-1; - int last = start+gmtmp.length(); - if(PassageContexts.length()>last) - { - String mention = PassageContexts.substring(start+1,last+1); - if(!GeneMentionLocationGNR.containsKey(j+"\t"+start) && !GeneMentionLocationGNR.containsKey(j+"\t"+last)) - { - if(GNormPlus.BioCDocobj.Annotations.get(i).get(j).contains(start+"\t"+last+"\t"+mention+"\tFamilyName")) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(start+"\t"+last+"\t"+mention+"\tFamilyName"); - } - else if(GNormPlus.BioCDocobj.Annotations.get(i).get(j).contains(start+"\t"+last+"\t"+mention+"\tDomainMotif")) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(start+"\t"+last+"\t"+mention+"\tDomainMotif"); - } - GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tGene"); - } - gmtmp = gmtmp.replaceAll(".", "X"); - PassageContexts_tmp=pre+""+gmtmp+""+post; - mtmp = ptmp.matcher(PassageContexts_tmp); - } - } - } - } - } - } - - //Extend to all family mentions - for(int j=0;ji && GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j) - { - String PassageContexts = " " + GNormPlus.BioCDocobj.PassageContexts.get(i).get(j) + " "; - String PassageContexts_tmp = PassageContexts.toLowerCase(); - for(String gm : GeneMentions.keySet()) - { - String type=GeneMentions.get(gm); - if(type.matches("(FamilyName|DomainMotif)")) - { - gm = gm.replaceAll("([\\W\\-\\_])", "\\\\$1"); - gm=gm.replaceAll("s$", "(s\\|)"); - Pattern ptmp = Pattern.compile("^(.*[\\W\\-\\_])("+gm+")([\\W\\-\\_].*)$"); - Matcher mtmp = ptmp.matcher(PassageContexts_tmp); - while(mtmp.find()) - { - String pre = mtmp.group(1); - String gmtmp = mtmp.group(2); - String post = mtmp.group(3); - - int start = pre.length()-1; - int last = start+gmtmp.length(); - if(PassageContexts.length()>last) - { - String mention = PassageContexts.substring(start+1,last+1); - if(!GeneMentionLocationGNR.containsKey(j+"\t"+start) && !GeneMentionLocationGNR.containsKey(j+"\t"+last)) - { - if(!GNormPlus.BioCDocobj.Annotations.get(i).get(j).contains(start+"\t"+last+"\t"+mention+"\tGene")) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\t"+type); - } - } - gmtmp = gmtmp.replaceAll(".", "X"); - PassageContexts_tmp=pre+""+gmtmp+""+post; - mtmp = ptmp.matcher(PassageContexts_tmp); - } - } - } - } - } - } - } - } - GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,false); //save in BioC file - } -} - - +/** + * Project: GNormPlus + * Function: Gene Name Recognition + */ + +package GNormPluslib; + +import java.io.*; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.xml.stream.XMLStreamException; + +import org.tartarus.snowball.SnowballStemmer; +import org.tartarus.snowball.ext.englishStemmer; + +import GNormPluslib.GNormPlus; +import GNormPluslib.BioCDoc; + +public class GNR +{ + /* + * Read BioC files + */ + public void Ab3P(String Filename,String FilenameAbb,String TrainTest) throws XMLStreamException,IOException + { + /** Abbreviation*/ + //BioC -> Abb input + String line=""; + BufferedWriter FileAbb = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameAbb), "UTF-8")); + for (int i = 0; i < GNormPlus.BioCDocobj.PMIDs.size(); i++) + { + String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); + String Context=""; + for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) + { + String PassageContext=GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); + if(PassageContext.matches(".*\\([^\\(\\)]+,[^\\(\\)]+\\).*")) + { + PassageContext=PassageContext.replaceAll("\\([^\\(\\)]+,[^\\(\\)]+\\)", ""); + } + if(PassageContext.contains("\\(")) + { + Context = Context+PassageContext+" "; + } + } + FileAbb.write(Pmid+"\n"+Context+"\n\n"); + } + FileAbb.close(); + //Abb + File f = new File(FilenameAbb+".out"); + BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); + Runtime runtime = Runtime.getRuntime(); + String cmd ="./Ab3P "+FilenameAbb+".Abb "+FilenameAbb+".out"; + + String OS=System.getProperty("os.name").toLowerCase(); + if(OS.contains("windows")) + { + cmd ="java -jar bioadi.jar "+FilenameAbb; + } + else //if(OS.contains("nux")||OS.contains("nix")) + { + cmd ="./Ab3P "+FilenameAbb+" "+FilenameAbb+".out"; + //cmd ="java -jar bioadi.jar "+FilenameAbb+" > "+FilenameAbb+".out"; + } + + Process process = runtime.exec(cmd); + InputStream is = process.getInputStream(); + InputStreamReader isr = new InputStreamReader(is, "UTF-8"); + BufferedReader br = new BufferedReader(isr); + line=""; + while ( (line = br.readLine()) != null) + { + fr.write(line); + fr.newLine(); + fr.flush(); + } + is.close(); + isr.close(); + br.close(); + fr.close(); + //Abb output -> Hash + BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameAbb+".out"), "UTF-8")); + line=""; + String pmid=""; + while ((line = inputfile.readLine()) != null) + { + String patt="^ (.+)\\|(.+)\\|([0-9\\.]+)$"; + Pattern ptmp = Pattern.compile(patt); + Matcher mtmp = ptmp.matcher(line); + if(line.matches("^[0-9]+$")) + { + pmid=line; + } + if(mtmp.find()) + { + String SF = mtmp.group(1); + String LF = mtmp.group(2); + double weight= Double.parseDouble(mtmp.group(3)); + GNormPlus.Pmid2Abb_hash.put(pmid+"\t"+SF, "Abb:SF"); + GNormPlus.Pmid2Abb_hash.put(pmid+"\t"+LF, "Abb:LF"); + GNormPlus.PmidLF2Abb_lc_hash.put(pmid+"\t"+LF.toLowerCase(), SF.toLowerCase()); + GNormPlus.PmidAbb2LF_lc_hash.put(pmid+"\t"+SF.toLowerCase(), LF.toLowerCase()); + GNormPlus.PmidAbb2LF_hash.put(pmid+"\t"+SF, LF); + if(weight >= 0.9) + { + GNormPlus.PmidLF2Abb_hash.put(pmid+"\t"+LF, SF); + } + } + } + inputfile.close(); + } + + public void LoadInputFile(String Filename,String FilenameAbb,String TrainTest) throws XMLStreamException,IOException + { + /** Read BioC file */ + //if(TrainTest.equals("Train")) + //{ + GNormPlus.BioCDocobj.BioCReaderWithAnnotation(Filename); + //} + //else + //{ + // GNormPlus.BioCDocobj.BioCReader(Filename); + //} + + + /** Abbreviation*/ + //BioC -> Abb input + String line=""; + BufferedWriter FileAbb = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameAbb), "UTF-8")); + for (int i = 0; i < GNormPlus.BioCDocobj.PMIDs.size(); i++) + { + String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); + String Context="Text:"; + for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) + { + String PassageContext=GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); + if(PassageContext.matches(".*\\([^\\(\\)]+,[^\\(\\)]+\\).*")) + { + PassageContext=PassageContext.replaceAll("\\([^\\(\\)]+,[^\\(\\)]+\\)", ""); + } + if(PassageContext.contains("(")) + { + Context = Context+PassageContext+" "; + } + } + FileAbb.write(Pmid+"\n"+Context+"\n\n"); + } + FileAbb.close(); + //Abb + File f = new File(FilenameAbb+".out"); + BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); + Runtime runtime = Runtime.getRuntime(); + String cmd ="./Ab3P "+FilenameAbb+".Abb "+FilenameAbb+".out"; + + String OS=System.getProperty("os.name").toLowerCase(); + if(OS.contains("windows")) + { + cmd ="java -jar bioadi.jar "+FilenameAbb; + } + else //if(OS.contains("nux")||OS.contains("nix")) + { + cmd ="./Ab3P "+FilenameAbb+" "+FilenameAbb+".out"; + //cmd ="java -jar bioadi.jar "+FilenameAbb+" > "+FilenameAbb+".out"; + } + + Process process = runtime.exec(cmd); + InputStream is = process.getInputStream(); + InputStreamReader isr = new InputStreamReader(is, "UTF-8"); + BufferedReader br = new BufferedReader(isr); + line=""; + while ( (line = br.readLine()) != null) + { + fr.write(line); + fr.newLine(); + fr.flush(); + } + is.close(); + isr.close(); + br.close(); + fr.close(); + //Abb output -> Hash + BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameAbb+".out"), "UTF-8")); + line=""; + String pmid=""; + while ((line = inputfile.readLine()) != null) + { + String patt="^ (.+)\\|(.+)\\|([0-9\\.]+)$"; + Pattern ptmp = Pattern.compile(patt); + Matcher mtmp = ptmp.matcher(line); + if(line.matches("^[0-9]+$")) + { + pmid=line; + } + if(mtmp.find()) + { + String SF = mtmp.group(1); + String LF = mtmp.group(2); + double weight= Double.parseDouble(mtmp.group(3)); + GNormPlus.Pmid2Abb_hash.put(pmid+"\t"+SF, "Abb:SF"); + GNormPlus.Pmid2Abb_hash.put(pmid+"\t"+LF, "Abb:LF"); + GNormPlus.PmidLF2Abb_lc_hash.put(pmid+"\t"+LF.toLowerCase(), SF.toLowerCase()); + GNormPlus.PmidAbb2LF_lc_hash.put(pmid+"\t"+SF.toLowerCase(), LF.toLowerCase()); + GNormPlus.PmidAbb2LF_hash.put(pmid+"\t"+SF, LF); + if(weight >= 0.9) + { + GNormPlus.PmidLF2Abb_hash.put(pmid+"\t"+LF, SF); + } + } + } + inputfile.close(); + } + + /* + * Feature Extraction + */ + public void FeatureExtraction(String FilenameData,String FilenameLoca,String TrainTest) throws XMLStreamException + { + try + { + /** output files */ + BufferedWriter FileLocation = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameLoca), "UTF-8")); // .location + BufferedWriter FileData = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameData), "UTF-8")); // .data + //NLP modules + SnowballStemmer stemmer = new englishStemmer(); + /** PMIDs : i */ + for (int i = 0; i < GNormPlus.BioCDocobj.PMIDs.size(); i++) + { + String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); + + /** Paragraphs : j */ + for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) + { + String PassageName= GNormPlus.BioCDocobj.PassageNames.get(i).get(j); // Passage name + int PassageOffset = GNormPlus.BioCDocobj.PassageOffsets.get(i).get(j); // Passage offset + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context + ArrayList Annotation = GNormPlus.BioCDocobj.Annotations.get(i).get(j); // Annotation + HashMap CTDGene_hash = new HashMap(); + HashMap FamilyName_hash = new HashMap(); + HashMap character_hash = new HashMap(); + HashMap Abbreviation_hash = new HashMap(); + String PassageContext_tmp=" "+PassageContext+" "; + + /** Abbreviation */ + HashMap Abb_sortebylength = new HashMap(); + ArrayList length_list = new ArrayList(); + int countn=0; + for (Object key : GNormPlus.Pmid2Abb_hash.keySet()) + { + String pmid2abb[]=key.toString().split("\t"); + if(Pmid.equals(pmid2abb[0])) + { + Abb_sortebylength.put(pmid2abb[1].length()*100+countn, pmid2abb[1]); + length_list.add(pmid2abb[1].length()*100+countn); + countn++; + } + } + Collections.sort(length_list); + for (int l=length_list.size()-1;l>=0;l--) + { + String AbbLF = Abb_sortebylength.get(length_list.get(l)); + AbbLF=AbbLF.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1"); + AbbLF=AbbLF.replaceAll(" ", "\\[ \\]\\+"); + Pattern ptmp = Pattern.compile("^(.*[^A-Za-z0-9]+)("+AbbLF+")([^A-Za-z0-9]+.*)$"); + Matcher mtmp = ptmp.matcher(PassageContext_tmp); + while(mtmp.find()) + { + String str1=mtmp.group(1); + String str2=mtmp.group(2); + String str3=mtmp.group(3); + for(int m=str1.length();m<=(str1.length()+str2.length());m++) + { + Abbreviation_hash.put((m-1),GNormPlus.Pmid2Abb_hash.get(Pmid+"\t"+Abb_sortebylength.get(length_list.get(l)))); + } + String men=""; + for(int m=0;m locations = GNormPlus.PT_CTDGene.SearchMentionLocation(PassageContext,"CTDGene"); + for (int k = 0 ; k < locations.size() ; k++) + { + String anno[]=locations.get(k).split("\t"); + int start= Integer.parseInt(anno[0]) + PassageOffset; + int last= Integer.parseInt(anno[1]) + PassageOffset; + String mention = anno[2]; + String id = anno[3]; + + CTDGene_hash.put(start,"CTDGene_B"); + CTDGene_hash.put(last,"CTDGene_E"); + for(int s=start+1;s locations_Fname = GNormPlus.PT_FamilyName.SearchMentionLocation(PassageContext,"FamilyName"); + for (int k = 0 ; k < locations_Fname.size() ; k++) + { + String anno[]=locations_Fname.get(k).split("\t"); + int start= Integer.parseInt(anno[0]) + PassageOffset; + int last= Integer.parseInt(anno[1]) + PassageOffset; + String mention = anno[2]; + String id = anno[3]; + + if(!CTDGene_hash.containsKey(start)) + { + FamilyName_hash.put(start,"famplex_B"); + FamilyName_hash.put(last,"famplex_E"); + for(int s=start+1;stokens[p].length() && PassageContext_tmp.substring(tokens[p].length(),tokens[p].length()+1).equals(" ")) + { + WSF="WSF:Gap"; + } + if(p==0) + { + WSB="WSB:1st"; + } + else if(p==tokens.length-1) + { + WSF="WSF:last"; + } + + if(PassageContext_tmp.substring(0,tokens[p].length()).equals(tokens[p])) + { + if(tokens[p].length()>0) + { + /* + * .loca + */ + int start=Offset; + int last=Offset+tokens[p].length(); + String State=""; + if(!character_hash.containsKey(start) || !character_hash.containsKey(last)){} + else if(character_hash.get(start).matches(".*B$")) + { + State=character_hash.get(start); + } + else if(character_hash.get(last).matches(".*E$")) + { + State=character_hash.get(last); + } + else if(character_hash.get(start).matches(".*I$")) + { + State=character_hash.get(start); + } + + if((!tokens[p].equals("\t"))) + { + FileLocation.write(Pmid+"\t"+PassageName+"\t"+j+"\t"+tokens[p]+"\t"+(Offset+1)+"\t"+(Offset+tokens[p].length())+"\t"+State+"\n"); + } + + /* + * .data + */ + + //Abbreviation + String Abb_State="__nil__"; + if(!Abbreviation_hash.containsKey(start) || !Abbreviation_hash.containsKey(last)){Abb_State="__nil__";} + else if(Abbreviation_hash.containsKey(start)) + { + Abb_State=Abbreviation_hash.get(start); + } + + //CTDGene + start=PassageOffset+Offset; + last=PassageOffset+Offset+tokens[p].length(); + String CTDGene_State="__nil__"; + if(!CTDGene_hash.containsKey(start) || !CTDGene_hash.containsKey(last)){CTDGene_State="__nil__";} + else if(CTDGene_hash.get(start).matches(".*B$")) + { + CTDGene_State=CTDGene_hash.get(start); + } + else if(CTDGene_hash.get(last).matches(".*E$")) + { + CTDGene_State=CTDGene_hash.get(last); + } + else if(CTDGene_hash.get(start).matches(".*I$")) + { + CTDGene_State=CTDGene_hash.get(start); + } + + //FamilyName + if(CTDGene_State.equals("__nil__")) + { + start=PassageOffset+Offset; + last=PassageOffset+Offset+tokens[p].length(); + if(!FamilyName_hash.containsKey(start) || !FamilyName_hash.containsKey(last)){} + else if(FamilyName_hash.get(start).matches(".*B$")) + { + CTDGene_State=FamilyName_hash.get(start); + } + else if(FamilyName_hash.get(last).matches(".*E$")) + { + CTDGene_State=FamilyName_hash.get(last); + } + else if(FamilyName_hash.get(start).matches(".*I$")) + { + CTDGene_State=FamilyName_hash.get(start); + } + } + + //stemming + stemmer.setCurrent(tokens[p].toLowerCase()); + stemmer.stem(); + String stem=stemmer.getCurrent(); + + //Number of Numbers [0-9] + String Num_num=""; + String tmp=tokens[p]; + tmp=tmp.replaceAll("[^0-9]",""); + if(tmp.length()>3){Num_num="N:4+";}else{Num_num="N:"+ tmp.length();} + + //Number of Uppercase [A-Z] + String Num_Uc=""; + tmp=tokens[p]; + tmp=tmp.replaceAll("[^A-Z]",""); + if(tmp.length()>3){Num_Uc="U:4+";}else{Num_Uc="U:"+ tmp.length();} + + //Number of Lowercase [a-z] + String Num_lc=""; + tmp=tokens[p]; + tmp=tmp.replaceAll("[^a-z]",""); + if(tmp.length()>3){Num_lc="L:4+";}else{Num_lc="L:"+ tmp.length();} + + //Number of ALL char + String Num_All=""; + if(tokens[p].length()>3){Num_All="A:4+";}else{Num_All="A:"+ tokens[p].length();} + + //specific character (;:,.->+_) + String SpecificC="__nil__"; + if(tokens[p].equals(";") || tokens[p].equals(":") || tokens[p].equals(",") || tokens[p].equals(".") || tokens[p].equals("-") || tokens[p].equals(">") || tokens[p].equals("+") || tokens[p].equals("_")) + { + SpecificC="-SpecificC1-"; + } + else if(tokens[p].equals("(") || tokens[p].equals(")")) + { + SpecificC="-SpecificC2-"; + } + else if(tokens[p].equals("{") || tokens[p].equals("}")) + { + SpecificC="-SpecificC3-"; + } + else if(tokens[p].equals("[") || tokens[p].equals("]")) + { + SpecificC="-SpecificC4-"; + } + else if(tokens[p].equals("\\") || tokens[p].equals("/")) + { + SpecificC="-SpecificC5-"; + } + + //Chemical Prefix/Suffix + String ChemPreSuf="__nil__"; + if(tokens[p].matches(".*(yl|ylidyne|oyl|sulfonyl)")){ChemPreSuf="-CHEMinlineSuffix-";} + else if(tokens[p].matches("(meth|eth|prop|tetracos).*")){ChemPreSuf="-CHEMalkaneStem-";} + else if(tokens[p].matches("(di|tri|tetra).*")){ChemPreSuf="-CHEMsimpleMultiplier-";} + else if(tokens[p].matches("(benzen|pyridin|toluen).*")){ChemPreSuf="-CHEMtrivialRing-";} + else if(tokens[p].matches(".*(one|ol|carboxylic|amide|ate|acid|ium|ylium|ide|uide|iran|olan|inan|pyrid|acrid|amid|keten|formazan|fydrazin)(s|)")){ChemPreSuf="-CHEMsuffix-";} + + + //Mention Type + String MentionType="__nil__"; + /* + if($tmp eq "to" && $CTD_result_hash{$count_token-1} eq "CTD_gene" && $CTD_result_hash{$count_token+1} eq "CTD_gene"){$CTD_result_hash{$count_token}="CTD_gene";} + if($tmp=~/^(or|and|,)$/ && $CTD_result_hash{$count_token-1} eq "CTD_gene" && $CTD_result_hash{$count_token+1} eq "CTD_gene"){$MentionType="-Type_GeneConjunction-";} + elsif($tmp=~/^(or|and|,)$/ && $last_token=~/^(or|and|,)$/ && $CTD_result_hash{$count_token-2} eq "CTD_gene" && $CTD_result_hash{$count_token+1} eq "CTD_gene"){$MentionType="-Type_GeneConjunction-";} + elsif($tmp=~/^(or|and|,)$/ && $next_token=~/^(or|and|,)$/ && $CTD_result_hash{$count_token-1} eq "CTD_gene" && $CTD_result_hash{$count_token+2} eq "CTD_gene"){$MentionType="-Type_GeneConjunction-";} + */ + if(tokens[p].matches("(ytochrome|cytochrome)")){MentionType="-Type_cytochrome-";} + else if(tokens[p].matches(".*target") ){MentionType="-Type_target-";} + else if(tokens[p].matches(".*(irradiation|hybrid|fusion|experiment|gst|est|gap|antigen)") ){MentionType="-Type_ExperimentNoun-";} + else if(tokens[p].matches(".*(disease|disorder|dystrophy|deficiency|syndrome|dysgenesis|cancer|injury|neoplasm|diabetes|diabete)") ){MentionType="-Type_Disease-";} + else if(tokens[p].matches(".*(motif|domain|omain|binding|site|region|sequence|frameshift|finger|box).*") ){MentionType="-Type_DomainMotif-";} + else if(tokens[p].equals("-") && (p0 && tokens[p-1].matches("^[0-9]+$")) ) ){MentionType="-Type_ChromosomeStrain-";} + else if(tokens[p].matches(".*(related|regulated|associated|correlated|reactive).*")){MentionType="-Type_relation-";} + else if(tokens[p].toLowerCase().matches(".*(polymorphism|mutation|deletion|insertion|duplication|genotype|genotypes).*") ){MentionType="-Type_VariationTerms-";} + else if(tokens[p].matches(".*(oxidase|transferase|transferases|kinase|kinese|subunit|unit|receptor|adrenoceptor|transporter|regulator|transcription|antigen|protein|gene|factor|member|molecule|channel|deaminase|spectrin).*") ){MentionType="-Type_suffix-";} + else if(tokens[p].matches("[\\(\\-\\_]") && (p=1){ prefix=tmp.substring(0, 1);}else{prefix="__nil__";} + if(tmp.length()>=2){ prefix=prefix+" "+tmp.substring(0, 2);}else{prefix=prefix+" __nil__";} + if(tmp.length()>=3){ prefix=prefix+" "+tmp.substring(0, 3);}else{prefix=prefix+" __nil__";} + if(tmp.length()>=4){ prefix=prefix+" "+tmp.substring(0, 4);}else{prefix=prefix+" __nil__";} + if(tmp.length()>=5){ prefix=prefix+" "+tmp.substring(0, 5);}else{prefix=prefix+" __nil__";} + + + //suffix + String suffix=""; + tmp=tokens[p]; + if(tmp.length()>=1){ suffix=tmp.substring(tmp.length()-1, tmp.length());}else{suffix="__nil__";} + if(tmp.length()>=2){ suffix=suffix+" "+tmp.substring(tmp.length()-2, tmp.length());}else{suffix=suffix+" __nil__";} + if(tmp.length()>=3){ suffix=suffix+" "+tmp.substring(tmp.length()-3, tmp.length());}else{suffix=suffix+" __nil__";} + if(tmp.length()>=4){ suffix=suffix+" "+tmp.substring(tmp.length()-4, tmp.length());}else{suffix=suffix+" __nil__";} + if(tmp.length()>=5){ suffix=suffix+" "+tmp.substring(tmp.length()-5, tmp.length());}else{suffix=suffix+" __nil__";} + + if(State.equals("")) + { + State="O"; + } + + if((!tokens[p].equals("\t"))) + { + if(TrainTest.equals("Train")) + { + FileData.write(tokens[p]+" "+stem+" "+WSB+" "+WSF+" "+Num_num+" "+Num_Uc+" "+Num_lc+" "+Num_All+" "+SpecificC+" "+ChemPreSuf+" "+MentionType+" "+ProteinSym+" "+prefix+" "+suffix+" "+CTDGene_State+" "+Abb_State+" "+State+"\n"); + } + else + { + FileData.write(tokens[p]+" "+stem+" "+WSB+" "+WSF+" "+Num_num+" "+Num_Uc+" "+Num_lc+" "+Num_All+" "+SpecificC+" "+ChemPreSuf+" "+MentionType+" "+ProteinSym+" "+prefix+" "+suffix+" "+CTDGene_State+" "+Abb_State+"\n"); + } + } + PassageContext_tmp=PassageContext_tmp.substring(tokens[p].length()); // remove the token for the context + Offset=Offset+tokens[p].length(); + } + } + } + if(tokens.length>0) + { + FileLocation.write("\n"); + FileData.write("\n"); + } + } + } + FileLocation.close(); + FileData.close(); + } + catch(IOException e1){ System.out.println("[MR]: Input file is not exist.");} + } + /* + * Testing by CRF++ + */ + public void CRF_test(String model, String FilenameData, String FilenameOutput) throws IOException + { + File f = new File(FilenameOutput); + BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); + + Runtime runtime = Runtime.getRuntime(); + + String OS=System.getProperty("os.name").toLowerCase(); + + String cmd="./CRF/crf_test -m "+model+" -o "+FilenameOutput+" "+FilenameData; + if(OS.contains("windows")) + { + cmd ="CRF/crf_test -m "+model+" -o "+FilenameOutput+" "+FilenameData; + } + else //if(OS.contains("nux")||OS.contains("nix")) + { + cmd ="./CRF/crf_test -m "+model+" -o "+FilenameOutput+" "+FilenameData; + } + + try { + Process process = runtime.exec(cmd); + InputStream is = process.getInputStream(); + InputStreamReader isr = new InputStreamReader(is, "UTF-8"); + BufferedReader br = new BufferedReader(isr); + String line=""; + while ( (line = br.readLine()) != null) + { + fr.write(line); + fr.newLine(); + fr.flush(); + } + is.close(); + isr.close(); + br.close(); + fr.close(); + } + catch (IOException e) { + System.out.println(e); + runtime.exit(0); + } + } + + public void CRF_test(String model,String FilenameData,String FilenameOutput,String top3) throws IOException + { + File f = new File(FilenameOutput); + BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); + + Runtime runtime = Runtime.getRuntime(); + + String OS=System.getProperty("os.name").toLowerCase(); + + String cmd="./CRF/crf_test -n 3 -m "+model+" -o "+FilenameOutput+" "+FilenameData; + if(OS.contains("windows")) + { + cmd ="CRF/crf_test -n 3 -m "+model+" -o "+FilenameOutput+" "+FilenameData; + } + else //if(OS.contains("nux")||OS.contains("nix")) + { + cmd ="./CRF/crf_test -n 3 -m "+model+" -o "+FilenameOutput+" "+FilenameData; + } + + try { + Process process = runtime.exec(cmd); + InputStream is = process.getInputStream(); + InputStreamReader isr = new InputStreamReader(is, "UTF-8"); + BufferedReader br = new BufferedReader(isr); + String line=""; + while ( (line = br.readLine()) != null) + { + fr.write(line); + fr.newLine(); + fr.flush(); + } + is.close(); + isr.close(); + br.close(); + fr.close(); + } + catch (IOException e) { + System.out.println(e); + runtime.exit(0); + } + } + + /* + * Learning model by CRF++ + */ + public void CRF_learn(String model, String FilenameData) throws IOException + { + Runtime runtime = Runtime.getRuntime(); + + Process process = null; + String line = null; + InputStream is = null; + InputStreamReader isr = null; + BufferedReader br = null; + + String OS=System.getProperty("os.name").toLowerCase(); + + String cmd="./CRF/crf_learn -f 3 -c 4.0 CRF/template_UB "+FilenameData+" "+model; + if(OS.contains("windows")) + { + cmd ="CRF/crf_learn -f 3 -c 4.0 CRF/template_UB "+FilenameData+" "+model; + } + else //if(OS.contains("nux")||OS.contains("nix")) + { + cmd ="./CRF/crf_learn -f 3 -c 4.0 CRF/template_UB "+FilenameData+" "+model; + } + + try { + process = runtime.exec(cmd); + is = process.getInputStream(); + isr = new InputStreamReader(is, "UTF-8"); + br = new BufferedReader(isr); + while ( (line = br.readLine()) != null) + { + System.out.println(line); + System.out.flush(); + } + is.close(); + isr.close(); + br.close(); + } + catch (IOException e) { + System.out.println(e); + runtime.exit(0); + } + } + + public void ReadCRFresult(String Filename,String FilenameLoca,String FilenameOutput,String FilenameBioC) throws XMLStreamException, IOException + { + /** load CRF output */ + ArrayList outputArr = new ArrayList(); + BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameOutput), "UTF-8")); + String line; + while ((line = inputfile.readLine()) != null) + { + outputArr.add(line); + } + inputfile.close(); + + /** load location */ + ArrayList locationArr = new ArrayList(); + inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameLoca), "UTF-8")); + while ((line = inputfile.readLine()) != null) + { + locationArr.add(line); + } + inputfile.close(); + + /** output -> mentions */ + String pmid_last=""; + String paragraph_num_last=""; + String pmid=""; + String paragraph=""; + String paragraph_num=""; + Pattern pat_B = Pattern.compile("((FamilyName|DomainMotif|Gene)_[B])$"); + Pattern pat_IE = Pattern.compile("((FamilyName|DomainMotif|Gene)_[IE])$"); + ArrayList> AnnotationInPMID = new ArrayList(); // array of Annotations in the PMIDs + ArrayList AnnotationInPassage= new ArrayList(); // array of Annotations in the Passage + GNormPlus.BioCDocobj.Annotations = new ArrayList(); + int countPMID=0; + int countPassage=0; + /** outputArr */ + for(int i=0;i3) + { + pmid=locationRow[0]; + paragraph=locationRow[1]; + paragraph_num=locationRow[2]; + } + + if( (!paragraph_num_last.equals("")) && (!paragraph_num.equals(paragraph_num_last)) ) + { + AnnotationInPMID.add(AnnotationInPassage); + AnnotationInPassage = new ArrayList(); + countPassage++; + } + if( (!pmid_last.equals("")) && (!pmid.equals(pmid_last)) ) + { + GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); + AnnotationInPMID = new ArrayList(); + countPMID++; + countPassage=0; + } + + boolean F = false; //Flag of Finding + if(locationRow.length>2) + { + Matcher mat = pat_B.matcher(outputsRow[outputsRow.length-1]); // last column : Status + while(mat.find() && locationRow.length==6) + { + MentionType=mat.group(2); + pmid=locationRow[0]; + paragraph_num=locationRow[2]; + int start_tmp=Integer.parseInt(locationRow[4])-1; + int last_tmp=Integer.parseInt(locationRow[5]); + if(start_tmplast){last=last_tmp;} + i++; + F = true; + if(locationArr.get(i).length()>0) + { + outputsRow=outputArr.get(i).split("\\t"); + locationRow=locationArr.get(i).split("\\t"); + mat = pat_IE.matcher(outputsRow[outputsRow.length-1]); + } + else + { + break; + } + } + } + + if(F == true) + { + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage); // Passage context + String Mention = PassageContext.substring(start, last); + String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", ""); + if(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*")){} + else if(Mention.matches("[A-Z][A-Z]s")){} + else if(Mention.matches(".*\\|.*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*")){} + else + { + AnnotationInPassage.add(start+"\t"+last+"\t"+Mention+"\t"+MentionType); + } + i--; + } + + paragraph_num_last=paragraph_num; + pmid_last=pmid; + }// outputArr1 + AnnotationInPMID.add(AnnotationInPassage); + GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); + + //GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,false); //save in BioC file + } + + public void ReadCRFresult(String Filename,String FilenameLoca,String FilenameOutput,String FilenameBioC,double threshold,double threshold_GeneType) throws XMLStreamException, IOException + { + /** load CRF output */ + ArrayList outputArr1 = new ArrayList(); + ArrayList outputArr2 = new ArrayList(); + ArrayList outputArr3 = new ArrayList(); + ArrayList outputArr1_score = new ArrayList(); + ArrayList outputArr2_score = new ArrayList(); + ArrayList outputArr3_score = new ArrayList(); + BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameOutput), "UTF-8")); + String line; + int rank=0; + String score=""; + Pattern pat_Rank = Pattern.compile("^# ([0-2]) ([0-9\\.]+)$"); + while ((line = inputfile.readLine()) != null) + { + Matcher mat = pat_Rank.matcher(line); // last column : Status + if(mat.find()) + { + rank = Integer.parseInt(mat.group(1)); + score = mat.group(2); + } + else if(rank == 0) + { + outputArr1.add(line); + outputArr1_score.add(score); + } + else if(rank == 1) + { + outputArr2.add(line); + outputArr2_score.add(score); + } + else if(rank == 2) + { + outputArr3.add(line); + outputArr3_score.add(score); + } + } + inputfile.close(); + + /** load location */ + ArrayList locationArr = new ArrayList(); + inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameLoca), "UTF-8")); + while ((line = inputfile.readLine()) != null) + { + locationArr.add(line); + } + inputfile.close(); + + /** output -> mentions */ + String pmid_last=""; + String paragraph_num_last=""; + String pmid=""; + String paragraph=""; + String paragraph_num=""; + Pattern pat_B = Pattern.compile("((FamilyName|DomainMotif|Gene)_[B])$"); + Pattern pat_IE = Pattern.compile("((FamilyName|DomainMotif|Gene)_[IE])$"); + ArrayList> AnnotationInPMID = new ArrayList(); // array of Annotations in the PMIDs + ArrayList AnnotationInPassage= new ArrayList(); // array of Annotations in the Passage + GNormPlus.BioCDocobj.Annotations = new ArrayList(); + int countPMID=0; + int countPassage=0; + /** outputArr1 */ + int size_Arr=outputArr1.size(); + if(locationArr.size()3) + { + pmid=locationRow[0]; + paragraph=locationRow[1]; + paragraph_num=locationRow[2]; + } + + boolean F = false; //Flag of Finding + if(outputsRow.length>=1) + { + Matcher mat = pat_B.matcher(outputsRow[outputsRow.length-1]); // last column : Status + while(mat.find() && locationRow.length==6) + { + MentionType=mat.group(2); + pmid=locationRow[0]; + int start_tmp=Integer.parseInt(locationRow[4])-1; + int last_tmp=Integer.parseInt(locationRow[5]); + if(start_tmplast){last=last_tmp;} + i++; + outputsRow=outputArr1.get(i).split("\\t"); + locationRow=locationArr.get(i).split("\\t"); + mat = pat_IE.matcher(outputsRow[outputsRow.length-1]); + F = true; + } + } + + if( (!paragraph_num_last.equals("")) && (!paragraph_num.equals(paragraph_num_last)) ) // paragraph change + { + AnnotationInPMID.add(AnnotationInPassage); + AnnotationInPassage = new ArrayList(); + countPassage++; + } + + if( !pmid.equals(pmid_last) && paragraph_num.equals("0") && paragraph_num_last.equals("0") ) // pmid change (special case : the article only has one paragrpah) + { + AnnotationInPMID.add(AnnotationInPassage); + AnnotationInPassage = new ArrayList(); + GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); + AnnotationInPMID = new ArrayList(); + countPMID++; + countPassage=0; + } + else if( (!pmid_last.equals("")) && (!pmid.equals(pmid_last)) ) // pmid change + { + GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); + AnnotationInPMID = new ArrayList(); + countPMID++; + countPassage=0; + } + + if(F == true) + { + if(GNormPlus.BioCDocobj.PassageContexts.size()>countPMID && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).size()>countPassage && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage).length()>=last && (last-start)<1000) + { + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage); // Passage context + String Mention = PassageContext.substring(start, last); + String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", ""); + if(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*")){} + else if(Mention.matches("[A-Z][A-Z]s")){} + else if(Mention.matches(".*\\|.*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*")){} + else if((GNormPlus.Abb2Longformtok_hash.containsKey(Mention_nospace.toLowerCase())) && (PassageContext.toLowerCase().matches(".*[\\W\\-\\-]("+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())+")[\\W\\-\\-].*"))) + { + //System.out.println(Mention_nospace.toLowerCase()+"\t"+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())); + } + else + { + AnnotationInPassage.add(start+"\t"+last+"\t"+Mention+"\t"+MentionType); + } + } + i--; + } + paragraph_num_last=paragraph_num; + pmid_last=pmid; + }// outputArr1 + AnnotationInPMID.add(AnnotationInPassage); + GNormPlus.BioCDocobj.Annotations.add(AnnotationInPMID); + + /** outputArr2 */ + pmid_last=""; + paragraph_num_last=""; + pmid=""; + paragraph=""; + paragraph_num=""; + countPMID=0; + countPassage=0; + size_Arr=outputArr2.size(); + if(locationArr.size()2) + { + pmid=locationRow[0]; + paragraph=locationRow[1]; + paragraph_num=locationRow[2]; + } + + boolean F = false; //Flag of Finding + if(outputsRow.length>=1) + { + Matcher mat = pat_B.matcher(outputsRow[outputsRow.length-1]); // last column : Status + while(mat.find() && locationRow.length==6) + { + MentionType=mat.group(2); + pmid=locationRow[0]; + int start_tmp=Integer.parseInt(locationRow[4])-1; + int last_tmp=Integer.parseInt(locationRow[5]); + if(start_tmplast){last=last_tmp;} + i++; + outputsRow=outputArr2.get(i).split("\\t"); + locationRow=locationArr.get(i).split("\\t"); + mat = pat_IE.matcher(outputsRow[outputsRow.length-1]); + F = true; + } + } + + if( (!paragraph_num_last.equals("")) && (!paragraph_num.equals(paragraph_num_last)) ) // paragraph change + { + countPassage++; + } + + if( !pmid.equals(pmid_last) && paragraph_num.equals("0") && paragraph_num_last.equals("0") ) // pmid change (special case : the article only has one paragrpah) + { + countPMID++; + countPassage=0; + } + else if( (!pmid_last.equals("")) && (!pmid.equals(pmid_last)) ) // pmid change + { + countPMID++; + countPassage=0; + } + + if(F == true) + { + if(GNormPlus.BioCDocobj.PassageContexts.size()>countPMID && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).size()>countPassage && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage).length()>=last && (last-start)<1000) + { + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage); // Passage context + String Mention = PassageContext.substring(start, last); + String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", ""); + if(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*")){} + else if(Mention.matches("[A-Z][A-Z]s")){} + else if(Mention.matches(".*\\|.*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*")){} + else if((GNormPlus.Abb2Longformtok_hash.containsKey(Mention_nospace.toLowerCase())) && (PassageContext.toLowerCase().matches(".*[\\W\\-\\-]("+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())+")[\\W\\-\\-].*"))) + { + //System.out.println(Mention_nospace.toLowerCase()+"\t"+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())); + } + else if(Double.parseDouble(outputArr2_score.get(i))>threshold) + { + boolean overlap=false; + for(int j=0;jthreshold_GeneType && GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).get(j).matches(start+"\t"+last+"\t"+Mention_tmp+"\t(FamilyName|DomainMotif)") ) + { + GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).set(j, start+"\t"+last+"\t"+Mention+"\t"+MentionType); + } + else if( (start>=startj && startstartj && last<=lastj) ) + { + overlap=true; + } + } + if(overlap == false) + { + GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).add(start+"\t"+last+"\t"+Mention+"\t"+MentionType); + } + } + } + i--; + } + + paragraph_num_last=paragraph_num; + pmid_last=pmid; + }// outputArr2 + + /** outputArr3 */ + pmid_last=""; + paragraph_num_last=""; + pmid=""; + paragraph=""; + paragraph_num=""; + countPMID=0; + countPassage=0; + size_Arr=outputArr3.size(); + if(locationArr.size()2) + { + pmid=locationRow[0]; + paragraph=locationRow[1]; + paragraph_num=locationRow[2]; + } + + boolean F = false; //Flag of Finding + if(outputsRow.length>=1) + { + Matcher mat = pat_B.matcher(outputsRow[outputsRow.length-1]); // last column : Status + while(mat.find() && locationRow.length==6) + { + MentionType=mat.group(2); + pmid=locationRow[0]; + paragraph_num=locationRow[2]; + int start_tmp=Integer.parseInt(locationRow[4])-1; + int last_tmp=Integer.parseInt(locationRow[5]); + if(start_tmplast){last=last_tmp;} + i++; + outputsRow=outputArr3.get(i).split("\\t"); + locationRow=locationArr.get(i).split("\\t"); + mat = pat_IE.matcher(outputsRow[outputsRow.length-1]); + F = true; + } + } + + if( (!paragraph_num_last.equals("")) && (!paragraph_num.equals(paragraph_num_last)) ) // paragraph change + { + countPassage++; + } + + if( !pmid.equals(pmid_last) && paragraph_num.equals("0") && paragraph_num_last.equals("0") ) // pmid change (special case : the article only has one paragrpah) + { + countPMID++; + countPassage=0; + } + else if( (!pmid_last.equals("")) && (!pmid.equals(pmid_last)) ) // pmid change + { + countPMID++; + countPassage=0; + } + + if(F == true) + { + if(GNormPlus.BioCDocobj.PassageContexts.size()>countPMID && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).size()>countPassage && GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage).length()>=last && (last-start)<1000) + { + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(countPMID).get(countPassage); // Passage context + String Mention = PassageContext.substring(start, last); + String Mention_nospace = Mention.replaceAll("[\\W\\-\\_]", ""); + if(Mention.toLowerCase().matches("(figure|tables|fig|tab|exp\\. [0-9]+).*")){} + else if(Mention.matches("[A-Z][A-Z]s")){} + else if(Mention.matches(".*\\|.*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\;\\,\\'\\/\\\\].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\(].*") && !Mention.matches(".*[\\)].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\[].*") && !Mention.matches(".*[\\]].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\)].*") && !Mention.matches(".*[\\(].*")){} + else if(Mention_nospace.length()<=3 && Mention.matches(".*[0-9].*") && Mention.matches(".*[\\]].*") && !Mention.matches(".*[\\[].*")){} + else if((GNormPlus.Abb2Longformtok_hash.containsKey(Mention_nospace.toLowerCase())) && (PassageContext.toLowerCase().matches(".*[\\W\\-\\-]("+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())+")[\\W\\-\\-].*"))) + { + //System.out.println(Mention_nospace.toLowerCase()+"\t"+GNormPlus.Abb2Longformtok_hash.get(Mention_nospace.toLowerCase())); + } + else if(Double.parseDouble(outputArr3_score.get(i))>threshold) + { + boolean overlap=false; + for(int j=0;jthreshold_GeneType && GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).get(j).matches(start+"\t"+last+"\t"+Mention_tmp+"\t(FamilyName|DomainMotif)") ) + { + GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).set(j, start+"\t"+last+"\t"+Mention+"\t"+MentionType); + } + else if( (start>=startj && startstartj && last<=lastj) ) + { + overlap=true; + } + } + if(overlap == false) + { + GNormPlus.BioCDocobj.Annotations.get(countPMID).get(countPassage).add(start+"\t"+last+"\t"+Mention+"\t"+MentionType); + } + } + } + i--; + } + + paragraph_num_last=paragraph_num; + pmid_last=pmid; + }// outputArr3 + + //GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,false); //save in BioC file + } + + public void PostProcessing(String Filename,String FilenameBioC) throws XMLStreamException, IOException + { + /** Develop Cell | FamilyName | DomainMotif lists */ + String Disease_Suffix="disease|diseases|syndrome|syndromes|tumor|tumour|deficiency|dysgenesis|atrophy|frame|dystrophy"; + String Cell_Suffix="cell|cells"; + String FamilyName_Suffix="disease|diseases|syndrome|syndromes|tumor|tumour|deficiency|dysgenesis|atrophy|frame|dystrophy|frame|factors|family|families|superfamily|superfamilies|subfamily|subfamilies|complex|genes|proteins"; + String DomainMotif_Suffix="domain|motif|domains|motifs|sequences"; + String Strain_Suffix="alpha|beta|gamma|kappa|theta|delta|[A-Ga-g0-9]"; + ArrayList Translate2Family = new ArrayList(); + + for(int i=0;i Mention2Type_Hash = new HashMap(); // for substring detection - Extract all mentions in the target PMID : MentionList + ArrayList GeneMentionPattern = new ArrayList(); // pattern match to extend Gene + HashMap MentionType2Num = new HashMap(); // for frequency calculation + if(GNormPlus.BioCDocobj.PMIDs.size()>=i) + { + String pmid=GNormPlus.BioCDocobj.PMIDs.get(i); + for(int j=0;j RemoveList = new ArrayList(); + for(int k=0;k Family name (TIF & TIF1) */ + boolean SubSt=false; + /* + // GDNFb -> GDNF (not work on 12682085_J_Cell_Biol_2003.xml) + for (String men : Mention2Type_Hash.keySet()) + { + if((!men.equals(mention.toLowerCase())) && men.matches(mention_tmp+"[\\W\\-\\_]*("+Strain_Suffix+")")) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tFamilyName"); + if(GNormPlus.PmidLF2Abb_lc_hash.containsKey(GNormPlus.BioCDocobj.PMIDs.get(i)+"\t"+mention.toLowerCase())) + { + Translate2Family.add(GNormPlus.PmidLF2Abb_lc_hash.get(GNormPlus.BioCDocobj.PMIDs.get(i)+"\t"+mention.toLowerCase())); + } + else if(GNormPlus.PmidAbb2LF_lc_hash.containsKey(GNormPlus.BioCDocobj.PMIDs.get(i)+"\t"+mention.toLowerCase())) + { + Translate2Family.add(GNormPlus.PmidAbb2LF_lc_hash.get(GNormPlus.BioCDocobj.PMIDs.get(i)+"\t"+mention.toLowerCase())); + } + SubSt=true; + break; + } + } + */ + if(SubSt == false) + { + int BoundaryLen=15; + if(GNormPlus.BioCDocobj.PassageContexts.get(i).get(j).length() Family/Domain/Cell */ + if( mention.toLowerCase().matches(".*("+Cell_Suffix+")") || SurroundingString.matches("("+Cell_Suffix+")") ) + { + type="Cell"; + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\t"+type); + } + else if( mention.toLowerCase().matches(".*("+FamilyName_Suffix+")") || SurroundingString.matches("("+FamilyName_Suffix+")") ) + { + type="FamilyName"; + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\t"+type); + } + else if( mention.toLowerCase().matches(".*("+DomainMotif_Suffix+")")|| SurroundingString.matches("("+DomainMotif_Suffix+")") ) + { + type="DomainMotif"; + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\t"+type); + } + else if(!type.equals("Gene")) + { + /* 3. Check (Family+Domain+Cell)/All rate (threshold = 0.5) - Family/Domain/Cell -> Gene */ + double Num_FDC=0; + double Num_Gene=0; + if(MentionType2Num.containsKey(mention.toLowerCase()+"\tFamilyName")) + { + Num_FDC = Num_FDC + MentionType2Num.get(mention.toLowerCase()+"\tFamilyName"); + } + if(MentionType2Num.containsKey(mention.toLowerCase()+"\tDomainMotif")) + { + Num_FDC = Num_FDC + MentionType2Num.get(mention.toLowerCase()+"\tDomainMotif"); + } + if(MentionType2Num.containsKey(mention.toLowerCase()+"\tCell")) + { + Num_FDC = Num_FDC + MentionType2Num.get(mention.toLowerCase()+"\tCell"); + } + if(MentionType2Num.containsKey(mention.toLowerCase()+"\tGene")) + { + Num_Gene = Num_Gene + MentionType2Num.get(mention.toLowerCase()+"\tGene"); + } + if(Num_Gene/(Num_FDC+Num_Gene)>=0.5) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tGene"); + } + + /* 4. Extend Genes to Family/Domain mentions by pattern match - Family/Domain/Cell -> Gene */ + for(int p=0;p Abb.type + * - Abb only : Abb.type -> LF.type + * - LF only : LF.type -> Abb.type + */ + String lc_ment=mention.toLowerCase(); + if(GNormPlus.PmidAbb2LF_lc_hash.containsKey(pmid+"\t"+lc_ment)) //the target mention is abbreviation + { + //Infer Abbreviation by Long form + if(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment).matches(".*("+Disease_Suffix+")")) + { + //remove the mention (Abb), because the LF is a disease + } + else if(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment).matches(".*("+Cell_Suffix+")")) + { + //GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Anno[0]+"\t"+Anno[1]+"\tCell"); + } + else if(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment).matches(".*("+FamilyName_Suffix+")") && !lc_ment.matches(".+[a-z][0-9][a-z]")) //AtRPA1a in pmid:19153602 + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tFamilyName"); + } + else if(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment).matches(".*("+DomainMotif_Suffix+")")) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tDomainMotif"); + } + else + { + if(Mention2Type_Hash.containsKey(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment)) + && Mention2Type_Hash.get(GNormPlus.PmidAbb2LF_lc_hash.get(pmid+"\t"+lc_ment)).equals("Gene") + && !(type.equals("Gene")) + ) // if Long Form is recognized as a Gene, and Abb is recognized as not a Gene + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, start+"\t"+last+"\t"+mention+"\tGene"); + } + } + } + } //if(Remov == true) + } + } + + for(int j=0;j GeneMentionPattern = new ArrayList(); // pattern match to extend Gene + HashMap GeneMentions = new HashMap(); // Extending Gene mentions + HashMap GeneMentionLocationGNR = new HashMap(); // Extending Gene mentions + for(int j=0;ji && GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j) + { + String PassageContexts = " " + GNormPlus.BioCDocobj.PassageContexts.get(i).get(j) + " "; + String PassageContexts_tmp = PassageContexts.toLowerCase(); + for(String gm : GeneMentions.keySet()) + { + String type=GeneMentions.get(gm); + if(type.equals("Gene")) + { + gm = gm.replaceAll("([\\W\\-\\_])", "\\\\$1"); + gm=gm.replaceAll("[0-9]", "\\[0\\-9\\]"); + gm=gm.replaceAll("(alpha|beta|gamma|theta|zeta|delta)", "(alpha\\|beta\\|gamma\\|theta\\|zeta\\|delta)"); + gm=gm.replaceAll("\\-[a-z]$", "\\-\\[a\\-z\\]"); + Pattern ptmp = Pattern.compile("^(.*[\\W\\-\\_])("+gm+")([\\W\\-\\_].*)$"); + Matcher mtmp = ptmp.matcher(PassageContexts_tmp); + while(mtmp.find()) + { + String pre = mtmp.group(1); + String gmtmp = mtmp.group(2); + String post = mtmp.group(3); + + int start = pre.length()-1; + int last = start+gmtmp.length(); + if(PassageContexts.length()>last) + { + String mention = PassageContexts.substring(start+1,last+1); + if(!GeneMentionLocationGNR.containsKey(j+"\t"+start) && !GeneMentionLocationGNR.containsKey(j+"\t"+last)) + { + if(GNormPlus.BioCDocobj.Annotations.get(i).get(j).contains(start+"\t"+last+"\t"+mention+"\tFamilyName")) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(start+"\t"+last+"\t"+mention+"\tFamilyName"); + } + else if(GNormPlus.BioCDocobj.Annotations.get(i).get(j).contains(start+"\t"+last+"\t"+mention+"\tDomainMotif")) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(start+"\t"+last+"\t"+mention+"\tDomainMotif"); + } + GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tGene"); + } + gmtmp = gmtmp.replaceAll(".", "X"); + PassageContexts_tmp=pre+""+gmtmp+""+post; + mtmp = ptmp.matcher(PassageContexts_tmp); + } + } + } + } + } + } + + //Extend to all family mentions + for(int j=0;ji && GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j) + { + String PassageContexts = " " + GNormPlus.BioCDocobj.PassageContexts.get(i).get(j) + " "; + String PassageContexts_tmp = PassageContexts.toLowerCase(); + for(String gm : GeneMentions.keySet()) + { + String type=GeneMentions.get(gm); + if(type.matches("(FamilyName|DomainMotif)")) + { + gm = gm.replaceAll("([\\W\\-\\_])", "\\\\$1"); + gm=gm.replaceAll("s$", "(s\\|)"); + Pattern ptmp = Pattern.compile("^(.*[\\W\\-\\_])("+gm+")([\\W\\-\\_].*)$"); + Matcher mtmp = ptmp.matcher(PassageContexts_tmp); + while(mtmp.find()) + { + String pre = mtmp.group(1); + String gmtmp = mtmp.group(2); + String post = mtmp.group(3); + + int start = pre.length()-1; + int last = start+gmtmp.length(); + if(PassageContexts.length()>last) + { + String mention = PassageContexts.substring(start+1,last+1); + if(!GeneMentionLocationGNR.containsKey(j+"\t"+start) && !GeneMentionLocationGNR.containsKey(j+"\t"+last)) + { + if(!GNormPlus.BioCDocobj.Annotations.get(i).get(j).contains(start+"\t"+last+"\t"+mention+"\tGene")) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\t"+type); + } + } + gmtmp = gmtmp.replaceAll(".", "X"); + PassageContexts_tmp=pre+""+gmtmp+""+post; + mtmp = ptmp.matcher(PassageContexts_tmp); + } + } + } + } + } + } + } + } + GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,false); //save in BioC file + } +} + + diff --git a/src_Java/GNormPluslib/GNormPlus.java b/src_Java/GNormPluslib/GNormPlus.java index 1b9ce98a8842e7b32decbbdc81160b2f6b2db1c0..22a6f73d791e28586355908b7d55b1f5b31fbda0 100644 --- a/src_Java/GNormPluslib/GNormPlus.java +++ b/src_Java/GNormPluslib/GNormPlus.java @@ -1,696 +1,696 @@ -package GNormPluslib; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.xml.stream.XMLStreamException; - -import GNormPluslib.PrefixTree; -import GNormPluslib.GNR; -import GNormPluslib.SR; - -public class GNormPlus -{ - public static BioCDoc BioCDocobj = new BioCDoc(); - public static PrefixTree PT_Species = new PrefixTree(); - public static PrefixTree PT_Cell = new PrefixTree(); - public static PrefixTree PT_CTDGene = new PrefixTree(); - public static PrefixTree PT_Gene = new PrefixTree(); - public static PrefixTree PT_GeneChromosome = new PrefixTree(); - public static PrefixTree PT_FamilyName = new PrefixTree(); - public static HashMap ent_hash = new HashMap(); - public static HashMap GenusID_hash = new HashMap(); - public static HashMap PrefixID_hash = new HashMap(); - public static HashMap TaxFreq_hash = new HashMap(); - public static HashMap GeneScoring_hash = new HashMap(); - public static HashMap GeneScoringDF_hash = new HashMap(); - public static HashMap GeneIDs_hash = new HashMap(); - public static HashMap Normalization2Protein_hash = new HashMap(); - public static HashMap HomologeneID_hash = new HashMap(); - public static HashMap SuffixTranslationMap_hash = new HashMap(); - public static HashMap SuffixTranslationMap2_hash = new HashMap(); - public static HashMap Pmid2Abb_hash = new HashMap(); - public static HashMap PmidAbb2LF_lc_hash = new HashMap(); - public static HashMap PmidLF2Abb_lc_hash = new HashMap(); - public static HashMap PmidAbb2LF_hash = new HashMap(); - public static HashMap PmidLF2Abb_hash = new HashMap(); - public static HashMap Pmid2ChromosomeGene_hash = new HashMap(); - public static HashMap SimConceptMention2Type_hash = new HashMap(); - public static HashMap Filtering_hash = new HashMap(); - public static HashMap Filtering_WithLongForm_hash = new HashMap(); - public static HashMap SP_Virus2Human_hash = new HashMap(); - public static HashMap GeneWithoutSPPrefix_hash = new HashMap(); - public static ArrayList taxid4gene = new ArrayList (); - public static HashMap setup_hash = new HashMap(); - public static HashMap suffixprefix_orig2modified = new HashMap(); - public static HashMap Abb2Longformtok_hash = new HashMap(); - public static HashMap StrainID_ancestor2tax_hash = new HashMap(); - public static HashMap StrainID_taxid2names_hash = new HashMap(); - - public static String SetupFile = "setup.txt"; - public static void main(String [] args) throws IOException, InterruptedException, XMLStreamException, SQLException - { - String InputFolder="input"; - String OutputFolder="output"; - String tmpFolder="tmp"; - String FocusSpecies = ""; - if(args.length<2) - { - System.out.println("\n$ java -Xmx30G -Xms10G -jar GNormPlus.jar [InputFolder] [OutputFolder] [SetupFile]"); - System.out.println("[InputFolder] Default : input"); - System.out.println("[OutputFolder] Default : output"); - System.out.println("[SetupFile] Default : setup.txt\n\n"); - } - else - { - /* - * Parameters - */ - InputFolder=args[0]; - OutputFolder=args[1]; - if(args.length>=3) - { - SetupFile = args[2]; - } - if(args.length>=4) - { - FocusSpecies=args[3]; - } - } - - BufferedReader br = new BufferedReader(new FileReader(SetupFile)); - String line=""; - Pattern ptmp = Pattern.compile("^ ([A-Za-z0-9]+) = ([^ \\t\\n\\r]+)$"); - while ((line = br.readLine()) != null) - { - Matcher mtmp = ptmp.matcher(line); - if(mtmp.find()) - { - setup_hash.put(mtmp.group(1), mtmp.group(2)); - } - } - br.close(); - if(!setup_hash.containsKey("GeneIDMatch")) - { - setup_hash.put("GeneIDMatch","True"); - } - if(!setup_hash.containsKey("HomologeneID")) - { - setup_hash.put("HomologeneID","False"); - } - if(!FocusSpecies.equals("")) - { - setup_hash.put("FocusSpecies",FocusSpecies); - } - if(!setup_hash.containsKey("ShowUnNormalizedMention")) - { - setup_hash.put("ShowUnNormalizedMention","False"); - } - if(setup_hash.containsKey("tmpFolder")) - { - tmpFolder=setup_hash.get("tmpFolder"); - } - - /* - * Time stamp - start : All - */ - double startTime,endTime,totTime; - startTime = System.currentTimeMillis();//start time - - int NumFiles=0; - File folder = new File(InputFolder); - File[] listOfFiles = folder.listFiles(); - for (int i = 0; i < listOfFiles.length; i++) - { - if (listOfFiles[i].isFile()) - { - String InputFile = listOfFiles[i].getName(); - File f = new File(OutputFolder+"/"+InputFile); - if(f.exists() && !f.isDirectory()) - { - } - else - { - NumFiles++; - } - } - } - - System.out.println("Total "+NumFiles+" file(s) wait(s) for process."); - - if(NumFiles>0) - { - /* - * Start & Load Dictionary - */ - String TrainTest = "Test"; - if(setup_hash.containsKey("TrainTest")) - { - TrainTest = setup_hash.get("TrainTest"); - } - - - /** Load Dictionary */ - if(setup_hash.containsKey("GeneRecognition") && setup_hash.get("GeneRecognition").toLowerCase().equals("true")) - { - System.out.print("Loading Gene NER Dictionary : Processing ... \r"); - /** CTDGene */ - if(setup_hash.containsKey("IgnoreNER") && setup_hash.get("IgnoreNER").toLowerCase().equals("true")){} // not NER (entities are pre-annotated) - else if(setup_hash.containsKey("SpeciesAssignmentOnly") && setup_hash.get("SpeciesAssignmentOnly").toLowerCase().equals("true")) {} // species assignment - else - { - PT_CTDGene.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_CTDGene.txt"); - } - /** ent */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/ent.rev.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); //Α Alpha - ent_hash.put(l[0], l[1]); - } - br.close(); - - /** FamilyName */ - if((!setup_hash.containsKey("IgnoreNER")) || setup_hash.get("IgnoreNER").toLowerCase() != "true") - { - PT_FamilyName.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_FamilyName.txt"); - } - - /** GeneChromosome */ - //PT_GeneChromosome.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_GeneChromosome.txt"); - System.out.println("Loading Gene NER Dictionary : Processing ... done."); - } - - if(setup_hash.containsKey("SpeciesRecognition") && setup_hash.get("SpeciesRecognition").toLowerCase().equals("true")) - { - System.out.print("Loading Species NER Dictionary : Processing ... \r"); - /** Species */ - PT_Species.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Species.txt"); - - /** Cell */ - PT_Cell.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Cell.txt"); - - /** Genus */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SPGenus.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - GenusID_hash.put(l[0], l[1]); // tax id -> Genus - } - br.close(); - - /** taxid4gene */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/tax4gene.txt")); - line=""; - while ((line = br.readLine()) != null) - { - taxid4gene.add(line); // tax id -> Genus - } - br.close(); - System.out.println("Loading Species NER Dictionary : Processing ... done."); - - } - - if(setup_hash.containsKey("SpeciesAssignment") && setup_hash.get("SpeciesAssignment").toLowerCase().equals("true")) - { - System.out.print("Loading Species Assignment Dictionary : Processing ... \r"); - /** GeneWithoutSPPrefix */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/GeneWithoutSPPrefix.txt")); - line=""; - while ((line = br.readLine()) != null) - { - GeneWithoutSPPrefix_hash.put(line, ""); - } - br.close(); - - /** Prefix */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SPPrefix.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - PrefixID_hash.put(l[0], l[1]); //tax id -> prefix - } - br.close(); - PrefixID_hash.put("9606", "h"); - PrefixID_hash.put("10090", "m"); - PrefixID_hash.put("10116", "r"); - PrefixID_hash.put("4932", "y"); - PrefixID_hash.put("7227", "d"); - PrefixID_hash.put("7955", "z|dr|Dr|Zf|zf"); - PrefixID_hash.put("3702", "at|At"); - - /** Frequency */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/taxonomy_freq.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - TaxFreq_hash.put(l[0], Double.parseDouble(l[1])/200000000); //tax id -> prefix - } - br.close(); - - /** SP_Virus2Human_hash */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SP_Virus2HumanList.txt")); - line=""; - while ((line = br.readLine()) != null) - { - SP_Virus2Human_hash.put(line,"9606"); - } - br.close(); - - /** SPStrain */ - /* - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SPStrain.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - String ancestor_id = l[0]; - String tax_id = l[1]; - String tax_names = l[2]; - StrainID_ancestor2tax_hash.put(ancestor_id, tax_id); // ancestor -> tax_id - StrainID_taxid2names_hash.put(tax_id, tax_names); // tax id -> strain - } - br.close(); - */ - System.out.println("Loading Species Assignment Dictionary : Processing ... done."); - - } - - if(setup_hash.containsKey("GeneNormalization") && setup_hash.get("GeneNormalization").toLowerCase().equals("true")) - { - System.out.print("Loading Gene normalization Dictionary : Processing ... \r"); - /** gene_prefix & gene_suffix */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/PrefixSuffix.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - String org=l[0]; - String mod=l[1]; - suffixprefix_orig2modified.put(org,mod); - } - br.close(); - - /** gene_prefix & gene_suffix */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/NonGeneAbbr.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - String shortform=l[0]; - String longform_toks=l[1]; - Abb2Longformtok_hash.put(shortform,longform_toks); - } - br.close(); - - /** SimConcept.MentionType */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SimConcept.MentionType.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - SimConceptMention2Type_hash.put(l[0], l[1]); - } - br.close(); - - /** Filtering */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/Filtering.txt")); - line=""; - while ((line = br.readLine()) != null) - { - Filtering_hash.put(line, ""); - } - br.close(); - - /** Filtering_WithLongForm.txt */ - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/Filtering_WithLongForm.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - Filtering_WithLongForm_hash.put(l[0], l[1]); - } - br.close(); - - /** Gene Dictionary */ - if(setup_hash.containsKey("FocusSpecies") && !setup_hash.get("FocusSpecies").equals("All")) - { - PT_Gene.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Gene."+setup_hash.get("FocusSpecies")+".txt"); - } - else if((!FocusSpecies.equals("")) && (!FocusSpecies.equals("All"))) - { - PT_Gene.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Gene."+FocusSpecies+".txt"); - } - else - { - PT_Gene.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Gene.txt"); - } - - /** GeneScoring */ - String FileName=setup_hash.get("DictionaryFolder")+"/GeneScoring.txt"; - - if(setup_hash.containsKey("FocusSpecies") && !setup_hash.get("FocusSpecies").equals("All")) - { - FileName = setup_hash.get("DictionaryFolder")+"/GeneScoring."+setup_hash.get("FocusSpecies")+".txt"; - } - else if((!FocusSpecies.equals("")) && (!FocusSpecies.equals("All"))) - { - FileName = setup_hash.get("DictionaryFolder")+"/GeneScoring."+FocusSpecies+".txt"; - } - br = new BufferedReader(new FileReader(FileName)); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - GeneScoring_hash.put(l[0], l[1]+"\t"+l[2]+"\t"+l[3]+"\t"+l[4]+"\t"+l[5]+"\t"+l[6]); - } - br.close(); - - /** GeneScoring.DF */ - FileName=setup_hash.get("DictionaryFolder")+"/GeneScoring.DF.txt"; - if(setup_hash.containsKey("FocusSpecies") && !setup_hash.get("FocusSpecies").equals("All")) - { - FileName = setup_hash.get("DictionaryFolder")+"/GeneScoring.DF."+setup_hash.get("FocusSpecies")+".txt"; - } - else if((!FocusSpecies.equals("")) && (!FocusSpecies.equals("All"))) - { - FileName = setup_hash.get("DictionaryFolder")+"/GeneScoring.DF."+FocusSpecies+".txt"; - } - br = new BufferedReader(new FileReader(FileName)); - double Sum = Double.parseDouble(br.readLine()); - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - // token -> idf - GeneScoringDF_hash.put(l[0], Math.log10(Sum/Double.parseDouble(l[1]))); - } - br.close(); - - /** Suffix Translation */ - SuffixTranslationMap_hash.put("alpha","a"); - SuffixTranslationMap_hash.put("a","alpha"); - SuffixTranslationMap_hash.put("beta","b"); - SuffixTranslationMap_hash.put("b","beta"); - SuffixTranslationMap_hash.put("delta","d"); - SuffixTranslationMap_hash.put("d","delta"); - SuffixTranslationMap_hash.put("z","zeta"); - SuffixTranslationMap_hash.put("zeta","z"); - SuffixTranslationMap_hash.put("gamma","g"); - SuffixTranslationMap_hash.put("g","gamma"); - SuffixTranslationMap_hash.put("r","gamma"); - SuffixTranslationMap_hash.put("y","gamma"); - - SuffixTranslationMap2_hash.put("2","ii"); - SuffixTranslationMap2_hash.put("ii","2"); - SuffixTranslationMap2_hash.put("II","2"); - SuffixTranslationMap2_hash.put("1","i"); - SuffixTranslationMap2_hash.put("i","1"); - SuffixTranslationMap2_hash.put("I","1"); - - /** GeneID */ - if(setup_hash.containsKey("GeneIDMatch") && setup_hash.get("GeneIDMatch").toLowerCase().equals("true")) - { - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/GeneIDs.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - GeneIDs_hash.put(l[0],l[1]); - } - br.close(); - } - - /** Normalization2Protein */ - if(setup_hash.containsKey("Normalization2Protein") && setup_hash.get("Normalization2Protein").toLowerCase().equals("true")) - { - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/Gene2Protein.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - Normalization2Protein_hash.put(l[0],l[1]); - } - br.close(); - } - - /** HomologeneID */ - if(setup_hash.containsKey("HomologeneID") && setup_hash.get("HomologeneID").toLowerCase().equals("true")) - { - br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/Gene2Homoid.txt")); - line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - HomologeneID_hash.put(l[0],l[1]); - } - br.close(); - } - System.out.println("Loading Gene normalization Dictionary : Processing ... done."); - } - - endTime = System.currentTimeMillis(); - totTime = endTime - startTime; - System.out.println("Loading Dictionary : Processing Time:"+totTime/1000+"sec"); - - folder = new File(InputFolder); - listOfFiles = folder.listFiles(); - for (int i = 0; i < listOfFiles.length; i++) - { - if (listOfFiles[i].isFile()) - { - String InputFile = listOfFiles[i].getName(); - File f = new File(OutputFolder+"/"+InputFile); - if(f.exists() && !f.isDirectory()) - { - System.out.println(InputFolder+"/"+InputFile+" - Done. (The output file exists in output folder)"); - } - else - { - String path=tmpFolder; - File file = new File(path); - File[] files = file.listFiles(); - for (File ftmp:files) - { - if (ftmp.isFile() && ftmp.exists()) - { - if(ftmp.toString().matches(tmpFolder+"/"+InputFile+".*")) - { - ftmp.delete(); - } - } - } - - BioCDocobj = new BioCDoc(); - - /* - * Format Check - */ - String Format = ""; - String checkR = BioCDocobj.BioCFormatCheck(InputFolder+"/"+InputFile); - if(checkR.equals("BioC")) - { - Format = "BioC"; - } - else if(checkR.equals("PubTator")) - { - Format = "PubTator"; - } - else - { - System.out.println(checkR); - System.exit(0); - } - - System.out.print(InputFolder+"/"+InputFile+" - ("+Format+" format) : Processing ... \r"); - - /** PubTator2BioC*/ - if(Format.equals("PubTator")) - { - BioCDocobj.PubTator2BioC(InputFolder+"/"+InputFile,tmpFolder+"/"+InputFile); - } - else - { - br = new BufferedReader(new FileReader(InputFolder+"/"+InputFile)); - BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmpFolder+"/"+InputFile), "UTF-8")); - line=""; - while ((line = br.readLine()) != null) - { - fr.write(line); - } - br.close(); - fr.close(); - } - - /** load file */ - GNR GNRobj = new GNR(); - GNRobj.LoadInputFile(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".Abb",TrainTest); - SR SRobj = new SR(); - SimConcept SCobj = new SimConcept(); - GN GNobj = new GN(); - String FinalStep=""; - - /** SpeciesRecognition */ - if(setup_hash.containsKey("SpeciesRecognition") && setup_hash.get("SpeciesRecognition").toLowerCase().equals("true") ) // pre-annotated name entities - { - SRobj.SpeciesRecognition(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".SR.xml",setup_hash.get("DictionaryFolder")+"/SPStrain.txt",setup_hash.get("FilterAntibody")); - FinalStep="SpeciesRecognition"; - } - - /** GeneRecognition */ - if( setup_hash.containsKey("GeneRecognition") && setup_hash.get("GeneRecognition").toLowerCase().equals("true") ) - { - GNRobj.FeatureExtraction(tmpFolder+"/"+InputFile+".data",tmpFolder+"/"+InputFile+".loca",TrainTest); - GNRobj.CRF_test(setup_hash.get("GNRModel"),tmpFolder+"/"+InputFile+".data",tmpFolder+"/"+InputFile+".output","top3"); //top3 - GNRobj.ReadCRFresult(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".loca",tmpFolder+"/"+InputFile+".output",tmpFolder+"/"+InputFile+".GNR.xml",0.005,0.05); //0.005,0.05 - f = new File(tmpFolder+"/"+InputFile+".SR.xml"); - if(f.exists()) - { - GNRobj.PostProcessing(tmpFolder+"/"+InputFile+".SR.xml",tmpFolder+"/"+InputFile+".GNR.xml"); - } - else - { - GNRobj.PostProcessing(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GNR.xml"); - } - FinalStep="GeneRecognition"; - } - - /** SpeciesAssignment */ - if(setup_hash.containsKey("SpeciesAssignment") && setup_hash.get("SpeciesAssignment").toLowerCase().equals("true") ) // pre-annotated name entities - { - if(setup_hash.containsKey("FocusSpecies") && !setup_hash.get("FocusSpecies").equals("All")) // FocusSpecies - { - f = new File(tmpFolder+"/"+InputFile+".GNR.xml"); - if(f.exists()) - { - SRobj.SpeciesAssignment(tmpFolder+"/"+InputFile+".GNR.xml",tmpFolder+"/"+InputFile+".SA.xml",setup_hash.get("FocusSpecies")); - } - else - { - SRobj.SpeciesAssignment(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".SA.xml",setup_hash.get("FocusSpecies")); - } - } - else// All Species - { - f = new File(tmpFolder+"/"+InputFile+".GNR.xml"); - if(f.exists()) - { - SRobj.SpeciesAssignment(tmpFolder+"/"+InputFile+".GNR.xml",tmpFolder+"/"+InputFile+".SA.xml"); - } - else - { - SRobj.SpeciesAssignment(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".SA.xml"); - } - } - FinalStep="SpeciesAssignment"; - } - - /** GeneNormalization */ - if((setup_hash.containsKey("GeneNormalization")) && setup_hash.get("GeneNormalization").toLowerCase().equals("true") ) - { - /** SimConcept */ - { - SCobj.FeatureExtraction_Test(tmpFolder+"/"+InputFile+".SC.data"); - SCobj.CRF_test(setup_hash.get("SCModel"),tmpFolder+"/"+InputFile+".SC.data",tmpFolder+"/"+InputFile+".SC.output"); - SCobj.ReadCRFresult(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".SC.output",tmpFolder+"/"+InputFile+".SC.xml"); - } - - /** GeneNormalization */ - { - GNobj.PreProcessing4GN(InputFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".PreProcessing4GN.xml"); - GNobj.ChromosomeRecognition(InputFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GN.xml"); - if(setup_hash.containsKey("GeneIDMatch") && setup_hash.get("GeneIDMatch").equals("True")) - { - - GNobj.GeneNormalization(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GN.xml",true); - GNobj.GeneIDRecognition(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GN.xml"); - } - else - { - GNobj.GeneNormalization(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GN.xml",false); - } - } - FinalStep="GeneNormalization"; - } - - /** BioC2PubTator*/ - String final_output=""; - if(FinalStep.equals("GeneNormalization")) - { - final_output=tmpFolder+"/"+InputFile+".GN.xml"; - } - else if(FinalStep.equals("SpeciesAssignment")) - { - final_output=tmpFolder+"/"+InputFile+".SA.xml"; - } - else if(FinalStep.equals("SpeciesRecognition")) - { - final_output=tmpFolder+"/"+InputFile+".SR.xml"; - } - else if(FinalStep.equals("GeneRecognition")) - { - final_output=tmpFolder+"/"+InputFile+".GNR.xml"; - } - - if(Format.equals("PubTator")) - { - BioCDocobj.BioC2PubTator(final_output,OutputFolder+"/"+InputFile); - } - else - { - br = new BufferedReader(new FileReader(final_output)); - BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(OutputFolder+"/"+InputFile), "UTF-8")); - line=""; - while ((line = br.readLine()) != null) - { - fr.write(line); - } - br.close(); - fr.close(); - } - - /* - * remove tmp files - */ - if((!setup_hash.containsKey("DeleteTmp")) || setup_hash.get("DeleteTmp").toLowerCase().equals("true")) - { - path="tmp"; - file = new File(path); - files = file.listFiles(); - for (File ftmp:files) - { - if (ftmp.isFile() && ftmp.exists()) - { - if(ftmp.toString().matches(tmpFolder+"/"+InputFile+".*")) - { - ftmp.delete(); - } - } - } - } - - /* - * Time stamp - last - */ - endTime = System.currentTimeMillis(); - totTime = endTime - startTime; - System.out.println(InputFolder+"/"+InputFile+" - ("+Format+" format) : Processing Time:"+totTime/1000+"sec"); - } - } - } - } - } -} +package GNormPluslib; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.stream.XMLStreamException; + +import GNormPluslib.PrefixTree; +import GNormPluslib.GNR; +import GNormPluslib.SR; + +public class GNormPlus +{ + public static BioCDoc BioCDocobj = new BioCDoc(); + public static PrefixTree PT_Species = new PrefixTree(); + public static PrefixTree PT_Cell = new PrefixTree(); + public static PrefixTree PT_CTDGene = new PrefixTree(); + public static PrefixTree PT_Gene = new PrefixTree(); + public static PrefixTree PT_GeneChromosome = new PrefixTree(); + public static PrefixTree PT_FamilyName = new PrefixTree(); + public static HashMap ent_hash = new HashMap(); + public static HashMap GenusID_hash = new HashMap(); + public static HashMap PrefixID_hash = new HashMap(); + public static HashMap TaxFreq_hash = new HashMap(); + public static HashMap GeneScoring_hash = new HashMap(); + public static HashMap GeneScoringDF_hash = new HashMap(); + public static HashMap GeneIDs_hash = new HashMap(); + public static HashMap Normalization2Protein_hash = new HashMap(); + public static HashMap HomologeneID_hash = new HashMap(); + public static HashMap SuffixTranslationMap_hash = new HashMap(); + public static HashMap SuffixTranslationMap2_hash = new HashMap(); + public static HashMap Pmid2Abb_hash = new HashMap(); + public static HashMap PmidAbb2LF_lc_hash = new HashMap(); + public static HashMap PmidLF2Abb_lc_hash = new HashMap(); + public static HashMap PmidAbb2LF_hash = new HashMap(); + public static HashMap PmidLF2Abb_hash = new HashMap(); + public static HashMap Pmid2ChromosomeGene_hash = new HashMap(); + public static HashMap SimConceptMention2Type_hash = new HashMap(); + public static HashMap Filtering_hash = new HashMap(); + public static HashMap Filtering_WithLongForm_hash = new HashMap(); + public static HashMap SP_Virus2Human_hash = new HashMap(); + public static HashMap GeneWithoutSPPrefix_hash = new HashMap(); + public static ArrayList taxid4gene = new ArrayList (); + public static HashMap setup_hash = new HashMap(); + public static HashMap suffixprefix_orig2modified = new HashMap(); + public static HashMap Abb2Longformtok_hash = new HashMap(); + public static HashMap StrainID_ancestor2tax_hash = new HashMap(); + public static HashMap StrainID_taxid2names_hash = new HashMap(); + + public static String SetupFile = "setup.txt"; + public static void main(String [] args) throws IOException, InterruptedException, XMLStreamException, SQLException + { + String InputFolder="input"; + String OutputFolder="output"; + String tmpFolder="tmp"; + String FocusSpecies = ""; + if(args.length<2) + { + System.out.println("\n$ java -Xmx30G -Xms10G -jar GNormPlus.jar [InputFolder] [OutputFolder] [SetupFile]"); + System.out.println("[InputFolder] Default : input"); + System.out.println("[OutputFolder] Default : output"); + System.out.println("[SetupFile] Default : setup.txt\n\n"); + } + else + { + /* + * Parameters + */ + InputFolder=args[0]; + OutputFolder=args[1]; + if(args.length>=3) + { + SetupFile = args[2]; + } + if(args.length>=4) + { + FocusSpecies=args[3]; + } + } + + BufferedReader br = new BufferedReader(new FileReader(SetupFile)); + String line=""; + Pattern ptmp = Pattern.compile("^ ([A-Za-z0-9]+) = ([^ \\t\\n\\r]+)$"); + while ((line = br.readLine()) != null) + { + Matcher mtmp = ptmp.matcher(line); + if(mtmp.find()) + { + setup_hash.put(mtmp.group(1), mtmp.group(2)); + } + } + br.close(); + if(!setup_hash.containsKey("GeneIDMatch")) + { + setup_hash.put("GeneIDMatch","True"); + } + if(!setup_hash.containsKey("HomologeneID")) + { + setup_hash.put("HomologeneID","False"); + } + if(!FocusSpecies.equals("")) + { + setup_hash.put("FocusSpecies",FocusSpecies); + } + if(!setup_hash.containsKey("ShowUnNormalizedMention")) + { + setup_hash.put("ShowUnNormalizedMention","False"); + } + if(setup_hash.containsKey("tmpFolder")) + { + tmpFolder=setup_hash.get("tmpFolder"); + } + + /* + * Time stamp - start : All + */ + double startTime,endTime,totTime; + startTime = System.currentTimeMillis();//start time + + int NumFiles=0; + File folder = new File(InputFolder); + File[] listOfFiles = folder.listFiles(); + for (int i = 0; i < listOfFiles.length; i++) + { + if (listOfFiles[i].isFile()) + { + String InputFile = listOfFiles[i].getName(); + File f = new File(OutputFolder+"/"+InputFile); + if(f.exists() && !f.isDirectory()) + { + } + else + { + NumFiles++; + } + } + } + + System.out.println("Total "+NumFiles+" file(s) wait(s) for process."); + + if(NumFiles>0) + { + /* + * Start & Load Dictionary + */ + String TrainTest = "Test"; + if(setup_hash.containsKey("TrainTest")) + { + TrainTest = setup_hash.get("TrainTest"); + } + + + /** Load Dictionary */ + if(setup_hash.containsKey("GeneRecognition") && setup_hash.get("GeneRecognition").toLowerCase().equals("true")) + { + System.out.print("Loading Gene NER Dictionary : Processing ... \r"); + /** CTDGene */ + if(setup_hash.containsKey("IgnoreNER") && setup_hash.get("IgnoreNER").toLowerCase().equals("true")){} // not NER (entities are pre-annotated) + else if(setup_hash.containsKey("SpeciesAssignmentOnly") && setup_hash.get("SpeciesAssignmentOnly").toLowerCase().equals("true")) {} // species assignment + else + { + PT_CTDGene.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_CTDGene.txt"); + } + /** ent */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/ent.rev.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); //Α Alpha + ent_hash.put(l[0], l[1]); + } + br.close(); + + /** FamilyName */ + if((!setup_hash.containsKey("IgnoreNER")) || setup_hash.get("IgnoreNER").toLowerCase() != "true") + { + PT_FamilyName.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_FamilyName.txt"); + } + + /** GeneChromosome */ + //PT_GeneChromosome.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_GeneChromosome.txt"); + System.out.println("Loading Gene NER Dictionary : Processing ... done."); + } + + if(setup_hash.containsKey("SpeciesRecognition") && setup_hash.get("SpeciesRecognition").toLowerCase().equals("true")) + { + System.out.print("Loading Species NER Dictionary : Processing ... \r"); + /** Species */ + PT_Species.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Species.txt"); + + /** Cell */ + PT_Cell.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Cell.txt"); + + /** Genus */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SPGenus.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + GenusID_hash.put(l[0], l[1]); // tax id -> Genus + } + br.close(); + + /** taxid4gene */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/tax4gene.txt")); + line=""; + while ((line = br.readLine()) != null) + { + taxid4gene.add(line); // tax id -> Genus + } + br.close(); + System.out.println("Loading Species NER Dictionary : Processing ... done."); + + } + + if(setup_hash.containsKey("SpeciesAssignment") && setup_hash.get("SpeciesAssignment").toLowerCase().equals("true")) + { + System.out.print("Loading Species Assignment Dictionary : Processing ... \r"); + /** GeneWithoutSPPrefix */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/GeneWithoutSPPrefix.txt")); + line=""; + while ((line = br.readLine()) != null) + { + GeneWithoutSPPrefix_hash.put(line, ""); + } + br.close(); + + /** Prefix */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SPPrefix.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + PrefixID_hash.put(l[0], l[1]); //tax id -> prefix + } + br.close(); + PrefixID_hash.put("9606", "h"); + PrefixID_hash.put("10090", "m"); + PrefixID_hash.put("10116", "r"); + PrefixID_hash.put("4932", "y"); + PrefixID_hash.put("7227", "d"); + PrefixID_hash.put("7955", "z|dr|Dr|Zf|zf"); + PrefixID_hash.put("3702", "at|At"); + + /** Frequency */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/taxonomy_freq.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + TaxFreq_hash.put(l[0], Double.parseDouble(l[1])/200000000); //tax id -> prefix + } + br.close(); + + /** SP_Virus2Human_hash */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SP_Virus2HumanList.txt")); + line=""; + while ((line = br.readLine()) != null) + { + SP_Virus2Human_hash.put(line,"9606"); + } + br.close(); + + /** SPStrain */ + /* + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SPStrain.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + String ancestor_id = l[0]; + String tax_id = l[1]; + String tax_names = l[2]; + StrainID_ancestor2tax_hash.put(ancestor_id, tax_id); // ancestor -> tax_id + StrainID_taxid2names_hash.put(tax_id, tax_names); // tax id -> strain + } + br.close(); + */ + System.out.println("Loading Species Assignment Dictionary : Processing ... done."); + + } + + if(setup_hash.containsKey("GeneNormalization") && setup_hash.get("GeneNormalization").toLowerCase().equals("true")) + { + System.out.print("Loading Gene normalization Dictionary : Processing ... \r"); + /** gene_prefix & gene_suffix */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/PrefixSuffix.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + String org=l[0]; + String mod=l[1]; + suffixprefix_orig2modified.put(org,mod); + } + br.close(); + + /** gene_prefix & gene_suffix */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/NonGeneAbbr.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + String shortform=l[0]; + String longform_toks=l[1]; + Abb2Longformtok_hash.put(shortform,longform_toks); + } + br.close(); + + /** SimConcept.MentionType */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/SimConcept.MentionType.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + SimConceptMention2Type_hash.put(l[0], l[1]); + } + br.close(); + + /** Filtering */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/Filtering.txt")); + line=""; + while ((line = br.readLine()) != null) + { + Filtering_hash.put(line, ""); + } + br.close(); + + /** Filtering_WithLongForm.txt */ + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/Filtering_WithLongForm.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + Filtering_WithLongForm_hash.put(l[0], l[1]); + } + br.close(); + + /** Gene Dictionary */ + if(setup_hash.containsKey("FocusSpecies") && !setup_hash.get("FocusSpecies").equals("All")) + { + PT_Gene.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Gene."+setup_hash.get("FocusSpecies")+".txt"); + } + else if((!FocusSpecies.equals("")) && (!FocusSpecies.equals("All"))) + { + PT_Gene.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Gene."+FocusSpecies+".txt"); + } + else + { + PT_Gene.TreeFile2Tree(setup_hash.get("DictionaryFolder")+"/PT_Gene.txt"); + } + + /** GeneScoring */ + String FileName=setup_hash.get("DictionaryFolder")+"/GeneScoring.txt"; + + if(setup_hash.containsKey("FocusSpecies") && !setup_hash.get("FocusSpecies").equals("All")) + { + FileName = setup_hash.get("DictionaryFolder")+"/GeneScoring."+setup_hash.get("FocusSpecies")+".txt"; + } + else if((!FocusSpecies.equals("")) && (!FocusSpecies.equals("All"))) + { + FileName = setup_hash.get("DictionaryFolder")+"/GeneScoring."+FocusSpecies+".txt"; + } + br = new BufferedReader(new FileReader(FileName)); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + GeneScoring_hash.put(l[0], l[1]+"\t"+l[2]+"\t"+l[3]+"\t"+l[4]+"\t"+l[5]+"\t"+l[6]); + } + br.close(); + + /** GeneScoring.DF */ + FileName=setup_hash.get("DictionaryFolder")+"/GeneScoring.DF.txt"; + if(setup_hash.containsKey("FocusSpecies") && !setup_hash.get("FocusSpecies").equals("All")) + { + FileName = setup_hash.get("DictionaryFolder")+"/GeneScoring.DF."+setup_hash.get("FocusSpecies")+".txt"; + } + else if((!FocusSpecies.equals("")) && (!FocusSpecies.equals("All"))) + { + FileName = setup_hash.get("DictionaryFolder")+"/GeneScoring.DF."+FocusSpecies+".txt"; + } + br = new BufferedReader(new FileReader(FileName)); + double Sum = Double.parseDouble(br.readLine()); + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + // token -> idf + GeneScoringDF_hash.put(l[0], Math.log10(Sum/Double.parseDouble(l[1]))); + } + br.close(); + + /** Suffix Translation */ + SuffixTranslationMap_hash.put("alpha","a"); + SuffixTranslationMap_hash.put("a","alpha"); + SuffixTranslationMap_hash.put("beta","b"); + SuffixTranslationMap_hash.put("b","beta"); + SuffixTranslationMap_hash.put("delta","d"); + SuffixTranslationMap_hash.put("d","delta"); + SuffixTranslationMap_hash.put("z","zeta"); + SuffixTranslationMap_hash.put("zeta","z"); + SuffixTranslationMap_hash.put("gamma","g"); + SuffixTranslationMap_hash.put("g","gamma"); + SuffixTranslationMap_hash.put("r","gamma"); + SuffixTranslationMap_hash.put("y","gamma"); + + SuffixTranslationMap2_hash.put("2","ii"); + SuffixTranslationMap2_hash.put("ii","2"); + SuffixTranslationMap2_hash.put("II","2"); + SuffixTranslationMap2_hash.put("1","i"); + SuffixTranslationMap2_hash.put("i","1"); + SuffixTranslationMap2_hash.put("I","1"); + + /** GeneID */ + if(setup_hash.containsKey("GeneIDMatch") && setup_hash.get("GeneIDMatch").toLowerCase().equals("true")) + { + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/GeneIDs.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + GeneIDs_hash.put(l[0],l[1]); + } + br.close(); + } + + /** Normalization2Protein */ + if(setup_hash.containsKey("Normalization2Protein") && setup_hash.get("Normalization2Protein").toLowerCase().equals("true")) + { + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/Gene2Protein.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + Normalization2Protein_hash.put(l[0],l[1]); + } + br.close(); + } + + /** HomologeneID */ + if(setup_hash.containsKey("HomologeneID") && setup_hash.get("HomologeneID").toLowerCase().equals("true")) + { + br = new BufferedReader(new FileReader(setup_hash.get("DictionaryFolder")+"/Gene2Homoid.txt")); + line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + HomologeneID_hash.put(l[0],l[1]); + } + br.close(); + } + System.out.println("Loading Gene normalization Dictionary : Processing ... done."); + } + + endTime = System.currentTimeMillis(); + totTime = endTime - startTime; + System.out.println("Loading Dictionary : Processing Time:"+totTime/1000+"sec"); + + folder = new File(InputFolder); + listOfFiles = folder.listFiles(); + for (int i = 0; i < listOfFiles.length; i++) + { + if (listOfFiles[i].isFile()) + { + String InputFile = listOfFiles[i].getName(); + File f = new File(OutputFolder+"/"+InputFile); + if(f.exists() && !f.isDirectory()) + { + System.out.println(InputFolder+"/"+InputFile+" - Done. (The output file exists in output folder)"); + } + else + { + String path=tmpFolder; + File file = new File(path); + File[] files = file.listFiles(); + for (File ftmp:files) + { + if (ftmp.isFile() && ftmp.exists()) + { + if(ftmp.toString().matches(tmpFolder+"/"+InputFile+".*")) + { + ftmp.delete(); + } + } + } + + BioCDocobj = new BioCDoc(); + + /* + * Format Check + */ + String Format = ""; + String checkR = BioCDocobj.BioCFormatCheck(InputFolder+"/"+InputFile); + if(checkR.equals("BioC")) + { + Format = "BioC"; + } + else if(checkR.equals("PubTator")) + { + Format = "PubTator"; + } + else + { + System.out.println(checkR); + System.exit(0); + } + + System.out.print(InputFolder+"/"+InputFile+" - ("+Format+" format) : Processing ... \r"); + + /** PubTator2BioC*/ + if(Format.equals("PubTator")) + { + BioCDocobj.PubTator2BioC(InputFolder+"/"+InputFile,tmpFolder+"/"+InputFile); + } + else + { + br = new BufferedReader(new FileReader(InputFolder+"/"+InputFile)); + BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmpFolder+"/"+InputFile), "UTF-8")); + line=""; + while ((line = br.readLine()) != null) + { + fr.write(line); + } + br.close(); + fr.close(); + } + + /** load file */ + GNR GNRobj = new GNR(); + GNRobj.LoadInputFile(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".Abb",TrainTest); + SR SRobj = new SR(); + SimConcept SCobj = new SimConcept(); + GN GNobj = new GN(); + String FinalStep=""; + + /** SpeciesRecognition */ + if(setup_hash.containsKey("SpeciesRecognition") && setup_hash.get("SpeciesRecognition").toLowerCase().equals("true") ) // pre-annotated name entities + { + SRobj.SpeciesRecognition(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".SR.xml",setup_hash.get("DictionaryFolder")+"/SPStrain.txt",setup_hash.get("FilterAntibody")); + FinalStep="SpeciesRecognition"; + } + + /** GeneRecognition */ + if( setup_hash.containsKey("GeneRecognition") && setup_hash.get("GeneRecognition").toLowerCase().equals("true") ) + { + GNRobj.FeatureExtraction(tmpFolder+"/"+InputFile+".data",tmpFolder+"/"+InputFile+".loca",TrainTest); + GNRobj.CRF_test(setup_hash.get("GNRModel"),tmpFolder+"/"+InputFile+".data",tmpFolder+"/"+InputFile+".output","top3"); //top3 + GNRobj.ReadCRFresult(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".loca",tmpFolder+"/"+InputFile+".output",tmpFolder+"/"+InputFile+".GNR.xml",0.005,0.05); //0.005,0.05 + f = new File(tmpFolder+"/"+InputFile+".SR.xml"); + if(f.exists()) + { + GNRobj.PostProcessing(tmpFolder+"/"+InputFile+".SR.xml",tmpFolder+"/"+InputFile+".GNR.xml"); + } + else + { + GNRobj.PostProcessing(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GNR.xml"); + } + FinalStep="GeneRecognition"; + } + + /** SpeciesAssignment */ + if(setup_hash.containsKey("SpeciesAssignment") && setup_hash.get("SpeciesAssignment").toLowerCase().equals("true") ) // pre-annotated name entities + { + if(setup_hash.containsKey("FocusSpecies") && !setup_hash.get("FocusSpecies").equals("All")) // FocusSpecies + { + f = new File(tmpFolder+"/"+InputFile+".GNR.xml"); + if(f.exists()) + { + SRobj.SpeciesAssignment(tmpFolder+"/"+InputFile+".GNR.xml",tmpFolder+"/"+InputFile+".SA.xml",setup_hash.get("FocusSpecies")); + } + else + { + SRobj.SpeciesAssignment(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".SA.xml",setup_hash.get("FocusSpecies")); + } + } + else// All Species + { + f = new File(tmpFolder+"/"+InputFile+".GNR.xml"); + if(f.exists()) + { + SRobj.SpeciesAssignment(tmpFolder+"/"+InputFile+".GNR.xml",tmpFolder+"/"+InputFile+".SA.xml"); + } + else + { + SRobj.SpeciesAssignment(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".SA.xml"); + } + } + FinalStep="SpeciesAssignment"; + } + + /** GeneNormalization */ + if((setup_hash.containsKey("GeneNormalization")) && setup_hash.get("GeneNormalization").toLowerCase().equals("true") ) + { + /** SimConcept */ + { + SCobj.FeatureExtraction_Test(tmpFolder+"/"+InputFile+".SC.data"); + SCobj.CRF_test(setup_hash.get("SCModel"),tmpFolder+"/"+InputFile+".SC.data",tmpFolder+"/"+InputFile+".SC.output"); + SCobj.ReadCRFresult(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".SC.output",tmpFolder+"/"+InputFile+".SC.xml"); + } + + /** GeneNormalization */ + { + GNobj.PreProcessing4GN(InputFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".PreProcessing4GN.xml"); + GNobj.ChromosomeRecognition(InputFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GN.xml"); + if(setup_hash.containsKey("GeneIDMatch") && setup_hash.get("GeneIDMatch").equals("True")) + { + + GNobj.GeneNormalization(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GN.xml",true); + GNobj.GeneIDRecognition(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GN.xml"); + } + else + { + GNobj.GeneNormalization(tmpFolder+"/"+InputFile,tmpFolder+"/"+InputFile+".GN.xml",false); + } + } + FinalStep="GeneNormalization"; + } + + /** BioC2PubTator*/ + String final_output=""; + if(FinalStep.equals("GeneNormalization")) + { + final_output=tmpFolder+"/"+InputFile+".GN.xml"; + } + else if(FinalStep.equals("SpeciesAssignment")) + { + final_output=tmpFolder+"/"+InputFile+".SA.xml"; + } + else if(FinalStep.equals("SpeciesRecognition")) + { + final_output=tmpFolder+"/"+InputFile+".SR.xml"; + } + else if(FinalStep.equals("GeneRecognition")) + { + final_output=tmpFolder+"/"+InputFile+".GNR.xml"; + } + + if(Format.equals("PubTator")) + { + BioCDocobj.BioC2PubTator(final_output,OutputFolder+"/"+InputFile); + } + else + { + br = new BufferedReader(new FileReader(final_output)); + BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(OutputFolder+"/"+InputFile), "UTF-8")); + line=""; + while ((line = br.readLine()) != null) + { + fr.write(line); + } + br.close(); + fr.close(); + } + + /* + * remove tmp files + */ + if((!setup_hash.containsKey("DeleteTmp")) || setup_hash.get("DeleteTmp").toLowerCase().equals("true")) + { + path="tmp"; + file = new File(path); + files = file.listFiles(); + for (File ftmp:files) + { + if (ftmp.isFile() && ftmp.exists()) + { + if(ftmp.toString().matches(tmpFolder+"/"+InputFile+".*")) + { + ftmp.delete(); + } + } + } + } + + /* + * Time stamp - last + */ + endTime = System.currentTimeMillis(); + totTime = endTime - startTime; + System.out.println(InputFolder+"/"+InputFile+" - ("+Format+" format) : Processing Time:"+totTime/1000+"sec"); + } + } + } + } + } +} diff --git a/src_Java/GNormPluslib/PrefixTree.java b/src_Java/GNormPluslib/PrefixTree.java index c75b0f8d2e611277889d35172d6f82f941365940..75eeb72c42e4775c18bb5d625f9be25fe8f71408 100644 --- a/src_Java/GNormPluslib/PrefixTree.java +++ b/src_Java/GNormPluslib/PrefixTree.java @@ -1,893 +1,893 @@ -/** - * Project: GNormPlus - * Function: Dictionary lookup by Prefix Tree - */ - -package GNormPluslib; - -import java.io.*; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class PrefixTree -{ - private Tree Tr=new Tree(); - - /* - * Read Dictionary and insert Mention into the Prefix Tree - */ - public static HashMap StopWord_hash = new HashMap(); - - public void Hash2Tree(HashMap ID2Names) - { - for(String ID : ID2Names.keySet()) - { - String NameColumn[]=ID2Names.get(ID).split("\\|"); - for(int i=0;i1) - { - Column[0]=Column[0].replace("species:ncbi:",""); - Column[1]=Column[1].replaceAll(" strain=", " "); - Column[1]=Column[1].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); - Column[1]=Column[1].replaceAll("[\\(\\)]", " "); - String SpNameColumn[]=Column[1].split("\\|"); - for(int i=0;i=3 - ) - { - boolean stopword_boolean=false; - for(String stopword_RegEx : StopWord_hash.keySet()) - { - Pattern ptmp = Pattern.compile("^"+stopword_RegEx+"$"); - Matcher mtmp = ptmp.matcher(SpNameColumn[i].toLowerCase()); - if(mtmp.find()) - { - stopword_boolean=true; - } - } - if(stopword_boolean == false) - { - Tr.insertMention(SpNameColumn[i],Column[0]); - } - } - /* - * Criteria for Gene - */ - else if (MentionType.equals("Gene") && - (!SpNameColumn[i].substring(0, 1).matches("[\\W\\-\\_]")) && - tmp.length()>=3 - ) - { - if(!StopWord_hash.containsKey(SpNameColumn[i].toLowerCase())) - { - Tr.insertMention(SpNameColumn[i],Column[0]); - } - } - /* - * Criteria for Cell - */ - else if (MentionType.equals("Cell") && - (!SpNameColumn[i].substring(0, 1).matches("[\\W\\-\\_]")) && - tmp.length()>=3 - ) - { - if(!StopWord_hash.containsKey(SpNameColumn[i].toLowerCase())) - { - Tr.insertMention(SpNameColumn[i],Column[0]); - } - } - /* - * others - */ - else if ((!SpNameColumn[i].substring(0, 1).matches("[\\W\\-\\_]")) && - tmp.length()>=3 - ) - { - if(!StopWord_hash.containsKey(SpNameColumn[i].toLowerCase())) - { - Tr.insertMention(SpNameColumn[i],Column[0]); - } - } - } - } - } - inputfile.close(); - } - catch(IOException e1){ System.out.println("[Dictionary2Tree_Combine]: Input file is not exist.");} - } - public void Dictionary2Tree_UniqueGene(String Filename,String StopWords,String Preifx) - { - try - { - //System.out.println("Dictionary2Tree_UniqueGene : " + Filename); - - /** Stop Word */ - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(StopWords), "UTF-8")); - String line=""; - while ((line = br.readLine()) != null) - { - StopWord_hash.put(line, "StopWord"); - } - br.close(); - - BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(Filename), "UTF-8")); - line=""; - //int count=0; - while ((line = inputfile.readLine()) != null) - { - //count++; - //if(count%10000==0){ System.out.println(count); } - String Column[]=line.split("\t"); - if(Column.length>1) - { - if(!StopWord_hash.containsKey(Column[0].toLowerCase())) - { - if(Preifx.equals("")) - { - Tr.insertMention(Column[0],Column[1]); - } - else if(Preifx.equals("Num") && Column[0].matches("[0-9].*")) - { - Tr.insertMention(Column[0],Column[1]); - } - else if(Preifx.equals("AZNum") && Column[0].matches("[a-z][0-9].*")) - { - Tr.insertMention(Column[0],Column[1]); - } - else if(Preifx.equals("lo") && Column[0].length()>2 && Column[0].substring(0,2).equals(Preifx)) - { - if( ! Column[0].matches("loc[0-9]+")) - { - Tr.insertMention(Column[0],Column[1]); - } - } - else if(Preifx.equals("un") && Column[0].length()>2 && Column[0].substring(0,2).equals(Preifx)) - { - if(Column[0].length()>=6 && Column[0].substring(0,6).equals("unchar")) - { - // remove uncharacterized - } - else - { - Tr.insertMention(Column[0],Column[1]); - } - } - else if(Column[0].length()>2 && Column[0].substring(0,2).equals(Preifx)) - { - Tr.insertMention(Column[0],Column[1]); - } - } - } - } - inputfile.close(); - } - catch(IOException e1){ System.out.println("[Dictionary2Tree_UniqueGene]: Input file is not exist.");} - } - public void Dictionary2Tree_UniqueSpecies(String Filename,String StopWords,String Preifx) - { - try - { - //System.out.println("Dictionary2Tree_UniqueGene : " + Filename); - - /** Stop Word */ - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(StopWords), "UTF-8")); - String line=""; - while ((line = br.readLine()) != null) - { - StopWord_hash.put(line, "StopWord"); - } - br.close(); - - BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(Filename), "UTF-8")); - line=""; - while ((line = inputfile.readLine()) != null) - { - //count++; - //if(count%10000==0){ System.out.println(count); } - String Column[]=line.split("\t"); - if(Column.length>1) - { - if(!StopWord_hash.containsKey(Column[0].toLowerCase())) - { - if(Preifx.equals("")) //all - { - if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) - { - String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); - String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); - if(mention_tmp.length()>=10) - { - Tr.insertMention(mention_rev,Column[1]); - } - } - else - { - Tr.insertMention(Column[0],Column[1]); // mention, id - } - - } - else if(Column[0].matches("[0-9][0-9].*")) - { - if(Preifx.equals("Num")) - { - if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) - { - String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); - String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); - if(mention_tmp.length()>=10) - { - Tr.insertMention(mention_rev,Column[1]); - } - } - else - { - Tr.insertMention(Column[0],Column[1]); // mention, id - } - } - } - /* - else if(Column[0].matches("[a-z][0-9].*")) - { - if(Preifx.equals("AZNum")) - { - if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) - { - String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); - String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); - if(mention_tmp.length()>=10) - { - Tr.insertMention(mention_rev,Column[1]); - } - } - else - { - Tr.insertMention(Column[0],Column[1]); // mention, id - } - } - } - */ - else if(Column[0].matches("[a-z][a-z].*")) - { - if(Column[0].length()>2 && Column[0].substring(0,2).equals(Preifx)) - { - if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) - { - String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); - String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); - if(mention_tmp.length()>=10) - { - Tr.insertMention(mention_rev,Column[1]); - } - } - else - { - Tr.insertMention(Column[0],Column[1]); // mention, id - } - } - } - else if(Preifx.equals("Others")) - { - if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) - { - String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); - String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); - if(mention_tmp.length()>=10) - { - Tr.insertMention(mention_rev,Column[1]); - } - } - else - { - Tr.insertMention(Column[0],Column[1]); // mention, id - } - } - } - } - } - inputfile.close(); - } - catch(IOException e1){ System.out.println("[Dictionary2Tree_UniqueGene]: Input file is not exist.");} - } - public void TreeFile2Tree(String Filename) - { - try - { - //System.out.println("TreeFile2Tree : " + Filename); - - BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(Filename), "UTF-8")); - String line=""; - int count=0; - while ((line = inputfile.readLine()) != null) - { - String Anno[]=line.split("\t"); - if(Anno.length<2){System.out.println(count+"\t"+line);} //check error - String LocationInTree = Anno[0]; - String token = Anno[1]; - String identifier=""; - if(Anno.length==3) - { - identifier = Anno[2]; - } - String LocationsInTree[]=LocationInTree.split("-"); - TreeNode tmp = Tr.root; - for(int i=0;i location = new ArrayList(); - String Menlist[]=Mentions.split("\\|"); - for(int m=0;m=0) //Find Tokens in the links - { - if(i == Tkns.length-1){PrefixTranslation = 1;} - tmp=tmp.links.get(tmp.CheckChild(Tkns[i],PrefixTranslation)); //move point to the link - find=true; - i++; - } - if(find == true) - { - if(i==Tkns.length) - { - if(!tmp.Concept.equals("")) - { - return tmp.Concept; - } - else - { - return "-1"; - //gene id is not found. - } - } - else - { - return "-2"; - //the gene mention matched a substring in PrefixTree. - } - } - else - { - return "-3"; - //mention is not found - } - } - return "-3"; //mention is not found - } - - /* - * Search target mention in the Prefix Tree - */ - public String MentionMatch_species(String Mentions) - { - ArrayList location = new ArrayList(); - String Menlist[]=Mentions.split("\\|"); - for(int m=0;m=0) //Find Tokens in the links - { - if(i == Tkns.length-1){PrefixTranslation = 1;} - tmp=tmp.links.get(tmp.CheckChild(Tkns[i],PrefixTranslation)); //move point to the link - find=true; - i++; - } - if(find == true) - { - if(i==Tkns.length) - { - if(!tmp.Concept.equals("")) - { - return tmp.Concept; - } - else - { - return "-1"; - //gene id is not found. - } - } - else - { - return "-2"; - //the gene mention matched a substring in PrefixTree. - } - } - else - { - return "-3"; - //mention is not found - } - } - return "-3"; //mention is not found - } - - /* - * Search target mention in the Prefix Tree - * ConceptType: Species|Genus|Cell|CTDGene - */ - public ArrayList SearchMentionLocation(String Doc,String ConceptType) - { - ArrayList location = new ArrayList(); - Doc=Doc+" XXXX XXXX"; - String Doc_org=Doc; - Doc=Doc.toLowerCase(); - String Doc_lc=Doc; - Doc = Doc.replaceAll("([0-9])([A-Za-z])", "$1 $2"); - Doc = Doc.replaceAll("([A-Za-z])([0-9])", "$1 $2"); - Doc = Doc.replaceAll("[\\W^;:,]+", " "); - - /* = keep special characters = - * - String regex="\\s+|(?=\\p{Punct})|(?<=\\p{Punct})"; - String DocTkns[]=Doc.split(regex); - */ - - String DocTkns[]=Doc.split(" "); - int Offset=0; - int Start=0; - int Last=0; - int FirstTime=0; - - while(Doc_lc.length()>0 && Doc_lc.substring(0,1).matches("[\\W]")) //clean the forward whitespace - { - Doc_lc=Doc_lc.substring(1); - Offset++; - } - - for(int i=0;i=0 ) //Find Tokens in the links - { - FirstTime_while++; - tmp=tmp.links.get(tmp.CheckChild(DocTkns[i],PrefixTranslation)); //move point to the link - if(Start==0 && FirstTime>0){Start = Offset;} //Start <- Offset - if(Doc_lc.length()>=DocTkns[i].length() && Doc_lc.substring(0,DocTkns[i].length()).equals(DocTkns[i])) - { - if(DocTkns[i].length()>0) - { - Doc_lc=Doc_lc.substring(DocTkns[i].length()); - Offset=Offset+DocTkns[i].length(); - } - } - Last = Offset; - while(Doc_lc.length()>0 && Doc_lc.substring(0,1).matches("[\\W]")) //clean the forward whitespace - { - Doc_lc=Doc_lc.substring(1); - Offset++; - } - i++; - - if(ConceptType.equals("Species")) - { - if(i0 && Doc_lc.substring(0,1).matches("[\\W]")) //clean the forward whitespace - { - Doc_lc=Doc_lc.substring(1); - Offset++; - } - i++; - } - } - - if(!tmp.Concept.equals("") && (Last-Start>0)) //Keep found concept - { - if(Last=DocTkns.length){break;} - else if(i==DocTkns.length-1){PrefixTranslation=2;} - - //System.out.println(i+"\t"+Start+"\t"+Last+"\t("+FirstTime_while+")\t"+Offset+"\t"+Doc_lc); - - if(FirstTime_while==0) // first matched token - { - pre_i=i; - pre_Start=Start; - pre_Last=Last; - pre_Doc_lc=Doc_lc; - pre_Offset=Offset; - } - } - - if(find == true) - { - //System.out.println(find+"\t"+FirstTime_while+"\t"+Start+"\t"+Last+"\t"+Doc_org.substring(Start, Last)+"\t"+tmp.Concept); - if(!tmp.Concept.equals("")) //the last matched token has concept id - { - if(LastStart) - { - location.add(Start+"\t"+Last+"\t"+Doc_org.substring(Start, Last)+"\t"+tmp.Concept); - } - } - else - { - if(!ConceptFound_STR.equals("")) //Keep found concept - { - location.add(ConceptFound_STR); - i = ConceptFound + 1; - } - - if(FirstTime_while>=1) - { - i=pre_i; - Start=pre_Start; - Last=pre_Last; - Doc_lc=pre_Doc_lc; - Offset=pre_Offset; - } - } - Start=0; - Last=0; - if(i>0){i--;} - ConceptFound=i; //Keep found concept - ConceptFound_STR="";//Keep found concept - } - else //if(find == false) - { - //System.out.println(find+"\t"+FirstTime_while+"\t"+Start+"\t"+Last+"\t"+Doc_org.substring(Start, Last)+"\t"+tmp.Concept); - - if(FirstTime_while>=1 && tmp.Concept.equals("")) - { - i=pre_i; - Start=pre_Start; - Last=pre_Last; - Doc_lc=pre_Doc_lc; - Offset=pre_Offset; - } - - if(Doc_lc.length()>=DocTkns[i].length() && Doc_lc.substring(0,DocTkns[i].length()).equals(DocTkns[i])) - { - if(DocTkns[i].length()>0) - { - Doc_lc=Doc_lc.substring(DocTkns[i].length()); - Offset=Offset+DocTkns[i].length(); - } - } - } - - while(Doc_lc.length()>0 && Doc_lc.substring(0,1).matches("[\\W]")) //clean the forward whitespace - { - Doc_lc=Doc_lc.substring(1); - Offset++; - } - FirstTime++; - - //System.out.println(); - } - return location; - } - - /* - * Print out the Prefix Tree - */ - public String PrintTree() - { - return Tr.PrintTree_preorder(Tr.root,""); - } - - public void SaveTree(String outputfile) throws IOException - { - BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputfile), "UTF-8")); - Tr.SaveTree_preorder(Tr.root,"",fr); - fr.close(); - } - - - public void insertMention(String Mention, String Identifier) - { - Tr.insertMention(Mention,Identifier); - } -} - -class Tree -{ - /* - * Prefix Tree - root node - */ - public TreeNode root; - - public Tree() - { - root = new TreeNode("-ROOT-"); - } - - /* - * Insert mention into the tree - */ - public void insertMention(String Mention, String Identifier) - { - Mention=Mention.toLowerCase(); - - Mention = Mention.replaceAll("([0-9])([A-Za-z])", "$1 $2"); - Mention = Mention.replaceAll("([A-Za-z])([0-9])", "$1 $2"); - Mention = Mention.replaceAll("[\\W\\-\\_]+", " "); - /* = keep special characters = - * - String regex="\\s+|(?=\\p{Punct})|(?<=\\p{Punct})"; - String Tokens[]=Mention.split(regex); - */ - String Tokens[]=Mention.split(" "); - TreeNode tmp = root; - for(int i=0;i=0) - { - tmp=tmp.links.get( tmp.CheckChild(Tokens[i],0) ); //go through next generation (exist node) - if(i == Tokens.length-1) - { - tmp.Concept=Identifier; - } - } - else //not exist - { - if(i == Tokens.length-1) - { - tmp.InsertToken(Tokens[i],Identifier); - } - else - { - tmp.InsertToken(Tokens[i]); - } - tmp=tmp.links.get(tmp.NumOflinks-1); //go to the next generation (new node) - } - } - } - - /* - * Print the tree by pre-order - */ - public String PrintTree_preorder(TreeNode node, String LocationInTree) - { - String opt=""; - if(!node.token.equals("-ROOT-"))//Ignore root - { - if(node.Concept.equals("")) - { - opt=opt+LocationInTree+"\t"+node.token+"\n"; - } - else - { - opt=opt+LocationInTree+"\t"+node.token+"\t"+node.Concept+"\n"; - } - } - if(!LocationInTree.equals("")){LocationInTree=LocationInTree+"-";} - for(int i=0;i Hashs; - ArrayList links; - - public TreeNode(String Tok,String ID) - { - token = Tok; - NumOflinks = 0; - Concept = ID; - links = new ArrayList();/*link*/ - Hashs = new HashMap();/*hash*/ - } - public TreeNode(String Tok) - { - token = Tok; - NumOflinks = 0; - Concept = ""; - links = new ArrayList();/*link*/ - Hashs = new HashMap();/*hash*/ - } - public TreeNode() - { - token = ""; - NumOflinks = 0; - Concept = ""; - links = new ArrayList();/*link*/ - Hashs = new HashMap();/*hash*/ - } - - public String toString() - { - return (token+"\t"+Concept); - } - - /* - * Insert an new node under the target node - */ - public void InsertToken(String Tok) - { - TreeNode NewNode = new TreeNode(Tok); - - /*link*/ - links.add(NewNode); - - /*hash*/ - Hashs.put(Tok, NumOflinks); - - NumOflinks++; - } - public void InsertToken(String Tok,String ID) - { - TreeNode NewNode = new TreeNode(Tok,ID); - /*link*/ - links.add(NewNode); - - /*hash*/ - Hashs.put(Tok, NumOflinks); - - NumOflinks++; - } - - /* - * Check the tokens of children - */ - public int CheckChild(String Tok, Integer PrefixTranslation) - { - if(Hashs.containsKey(Tok)) - { - return(Hashs.get(Tok)); - } - - if(PrefixTranslation == 1 && Tok.matches("(alpha|beta|gamam|[abg]|[12])")) // SuffixTranslationMap - { - if(Hashs.containsKey(GNormPlus.SuffixTranslationMap_hash.get(Tok))) - { - return(Hashs.get(GNormPlus.SuffixTranslationMap_hash.get(Tok))); - } - - } - else if(PrefixTranslation == 2 && Tok.matches("[1-5]")) // for CTDGene feature - { - for(int i=0;i StopWord_hash = new HashMap(); + + public void Hash2Tree(HashMap ID2Names) + { + for(String ID : ID2Names.keySet()) + { + String NameColumn[]=ID2Names.get(ID).split("\\|"); + for(int i=0;i1) + { + Column[0]=Column[0].replace("species:ncbi:",""); + Column[1]=Column[1].replaceAll(" strain=", " "); + Column[1]=Column[1].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); + Column[1]=Column[1].replaceAll("[\\(\\)]", " "); + String SpNameColumn[]=Column[1].split("\\|"); + for(int i=0;i=3 + ) + { + boolean stopword_boolean=false; + for(String stopword_RegEx : StopWord_hash.keySet()) + { + Pattern ptmp = Pattern.compile("^"+stopword_RegEx+"$"); + Matcher mtmp = ptmp.matcher(SpNameColumn[i].toLowerCase()); + if(mtmp.find()) + { + stopword_boolean=true; + } + } + if(stopword_boolean == false) + { + Tr.insertMention(SpNameColumn[i],Column[0]); + } + } + /* + * Criteria for Gene + */ + else if (MentionType.equals("Gene") && + (!SpNameColumn[i].substring(0, 1).matches("[\\W\\-\\_]")) && + tmp.length()>=3 + ) + { + if(!StopWord_hash.containsKey(SpNameColumn[i].toLowerCase())) + { + Tr.insertMention(SpNameColumn[i],Column[0]); + } + } + /* + * Criteria for Cell + */ + else if (MentionType.equals("Cell") && + (!SpNameColumn[i].substring(0, 1).matches("[\\W\\-\\_]")) && + tmp.length()>=3 + ) + { + if(!StopWord_hash.containsKey(SpNameColumn[i].toLowerCase())) + { + Tr.insertMention(SpNameColumn[i],Column[0]); + } + } + /* + * others + */ + else if ((!SpNameColumn[i].substring(0, 1).matches("[\\W\\-\\_]")) && + tmp.length()>=3 + ) + { + if(!StopWord_hash.containsKey(SpNameColumn[i].toLowerCase())) + { + Tr.insertMention(SpNameColumn[i],Column[0]); + } + } + } + } + } + inputfile.close(); + } + catch(IOException e1){ System.out.println("[Dictionary2Tree_Combine]: Input file is not exist.");} + } + public void Dictionary2Tree_UniqueGene(String Filename,String StopWords,String Preifx) + { + try + { + //System.out.println("Dictionary2Tree_UniqueGene : " + Filename); + + /** Stop Word */ + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(StopWords), "UTF-8")); + String line=""; + while ((line = br.readLine()) != null) + { + StopWord_hash.put(line, "StopWord"); + } + br.close(); + + BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(Filename), "UTF-8")); + line=""; + //int count=0; + while ((line = inputfile.readLine()) != null) + { + //count++; + //if(count%10000==0){ System.out.println(count); } + String Column[]=line.split("\t"); + if(Column.length>1) + { + if(!StopWord_hash.containsKey(Column[0].toLowerCase())) + { + if(Preifx.equals("")) + { + Tr.insertMention(Column[0],Column[1]); + } + else if(Preifx.equals("Num") && Column[0].matches("[0-9].*")) + { + Tr.insertMention(Column[0],Column[1]); + } + else if(Preifx.equals("AZNum") && Column[0].matches("[a-z][0-9].*")) + { + Tr.insertMention(Column[0],Column[1]); + } + else if(Preifx.equals("lo") && Column[0].length()>2 && Column[0].substring(0,2).equals(Preifx)) + { + if( ! Column[0].matches("loc[0-9]+")) + { + Tr.insertMention(Column[0],Column[1]); + } + } + else if(Preifx.equals("un") && Column[0].length()>2 && Column[0].substring(0,2).equals(Preifx)) + { + if(Column[0].length()>=6 && Column[0].substring(0,6).equals("unchar")) + { + // remove uncharacterized + } + else + { + Tr.insertMention(Column[0],Column[1]); + } + } + else if(Column[0].length()>2 && Column[0].substring(0,2).equals(Preifx)) + { + Tr.insertMention(Column[0],Column[1]); + } + } + } + } + inputfile.close(); + } + catch(IOException e1){ System.out.println("[Dictionary2Tree_UniqueGene]: Input file is not exist.");} + } + public void Dictionary2Tree_UniqueSpecies(String Filename,String StopWords,String Preifx) + { + try + { + //System.out.println("Dictionary2Tree_UniqueGene : " + Filename); + + /** Stop Word */ + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(StopWords), "UTF-8")); + String line=""; + while ((line = br.readLine()) != null) + { + StopWord_hash.put(line, "StopWord"); + } + br.close(); + + BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(Filename), "UTF-8")); + line=""; + while ((line = inputfile.readLine()) != null) + { + //count++; + //if(count%10000==0){ System.out.println(count); } + String Column[]=line.split("\t"); + if(Column.length>1) + { + if(!StopWord_hash.containsKey(Column[0].toLowerCase())) + { + if(Preifx.equals("")) //all + { + if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) + { + String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); + String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); + if(mention_tmp.length()>=10) + { + Tr.insertMention(mention_rev,Column[1]); + } + } + else + { + Tr.insertMention(Column[0],Column[1]); // mention, id + } + + } + else if(Column[0].matches("[0-9][0-9].*")) + { + if(Preifx.equals("Num")) + { + if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) + { + String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); + String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); + if(mention_tmp.length()>=10) + { + Tr.insertMention(mention_rev,Column[1]); + } + } + else + { + Tr.insertMention(Column[0],Column[1]); // mention, id + } + } + } + /* + else if(Column[0].matches("[a-z][0-9].*")) + { + if(Preifx.equals("AZNum")) + { + if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) + { + String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); + String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); + if(mention_tmp.length()>=10) + { + Tr.insertMention(mention_rev,Column[1]); + } + } + else + { + Tr.insertMention(Column[0],Column[1]); // mention, id + } + } + } + */ + else if(Column[0].matches("[a-z][a-z].*")) + { + if(Column[0].length()>2 && Column[0].substring(0,2).equals(Preifx)) + { + if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) + { + String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); + String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); + if(mention_tmp.length()>=10) + { + Tr.insertMention(mention_rev,Column[1]); + } + } + else + { + Tr.insertMention(Column[0],Column[1]); // mention, id + } + } + } + else if(Preifx.equals("Others")) + { + if(Column[0].matches(".*[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_].*")) + { + String mention_rev=Column[0].replaceAll("[\\W\\-\\_](str\\.|strain|substr\\.|substrain|var\\.|variety|variant|subsp\\.|subspecies|pv\\.|pathovars|pathovar|br\\.|biovar)[\\W\\-\\_]", " "); + String mention_tmp=mention_rev.replaceAll("[\\W\\-\\_]",""); + if(mention_tmp.length()>=10) + { + Tr.insertMention(mention_rev,Column[1]); + } + } + else + { + Tr.insertMention(Column[0],Column[1]); // mention, id + } + } + } + } + } + inputfile.close(); + } + catch(IOException e1){ System.out.println("[Dictionary2Tree_UniqueGene]: Input file is not exist.");} + } + public void TreeFile2Tree(String Filename) + { + try + { + //System.out.println("TreeFile2Tree : " + Filename); + + BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(Filename), "UTF-8")); + String line=""; + int count=0; + while ((line = inputfile.readLine()) != null) + { + String Anno[]=line.split("\t"); + if(Anno.length<2){System.out.println(count+"\t"+line);} //check error + String LocationInTree = Anno[0]; + String token = Anno[1]; + String identifier=""; + if(Anno.length==3) + { + identifier = Anno[2]; + } + String LocationsInTree[]=LocationInTree.split("-"); + TreeNode tmp = Tr.root; + for(int i=0;i location = new ArrayList(); + String Menlist[]=Mentions.split("\\|"); + for(int m=0;m=0) //Find Tokens in the links + { + if(i == Tkns.length-1){PrefixTranslation = 1;} + tmp=tmp.links.get(tmp.CheckChild(Tkns[i],PrefixTranslation)); //move point to the link + find=true; + i++; + } + if(find == true) + { + if(i==Tkns.length) + { + if(!tmp.Concept.equals("")) + { + return tmp.Concept; + } + else + { + return "-1"; + //gene id is not found. + } + } + else + { + return "-2"; + //the gene mention matched a substring in PrefixTree. + } + } + else + { + return "-3"; + //mention is not found + } + } + return "-3"; //mention is not found + } + + /* + * Search target mention in the Prefix Tree + */ + public String MentionMatch_species(String Mentions) + { + ArrayList location = new ArrayList(); + String Menlist[]=Mentions.split("\\|"); + for(int m=0;m=0) //Find Tokens in the links + { + if(i == Tkns.length-1){PrefixTranslation = 1;} + tmp=tmp.links.get(tmp.CheckChild(Tkns[i],PrefixTranslation)); //move point to the link + find=true; + i++; + } + if(find == true) + { + if(i==Tkns.length) + { + if(!tmp.Concept.equals("")) + { + return tmp.Concept; + } + else + { + return "-1"; + //gene id is not found. + } + } + else + { + return "-2"; + //the gene mention matched a substring in PrefixTree. + } + } + else + { + return "-3"; + //mention is not found + } + } + return "-3"; //mention is not found + } + + /* + * Search target mention in the Prefix Tree + * ConceptType: Species|Genus|Cell|CTDGene + */ + public ArrayList SearchMentionLocation(String Doc,String ConceptType) + { + ArrayList location = new ArrayList(); + Doc=Doc+" XXXX XXXX"; + String Doc_org=Doc; + Doc=Doc.toLowerCase(); + String Doc_lc=Doc; + Doc = Doc.replaceAll("([0-9])([A-Za-z])", "$1 $2"); + Doc = Doc.replaceAll("([A-Za-z])([0-9])", "$1 $2"); + Doc = Doc.replaceAll("[\\W^;:,]+", " "); + + /* = keep special characters = + * + String regex="\\s+|(?=\\p{Punct})|(?<=\\p{Punct})"; + String DocTkns[]=Doc.split(regex); + */ + + String DocTkns[]=Doc.split(" "); + int Offset=0; + int Start=0; + int Last=0; + int FirstTime=0; + + while(Doc_lc.length()>0 && Doc_lc.substring(0,1).matches("[\\W]")) //clean the forward whitespace + { + Doc_lc=Doc_lc.substring(1); + Offset++; + } + + for(int i=0;i=0 ) //Find Tokens in the links + { + FirstTime_while++; + tmp=tmp.links.get(tmp.CheckChild(DocTkns[i],PrefixTranslation)); //move point to the link + if(Start==0 && FirstTime>0){Start = Offset;} //Start <- Offset + if(Doc_lc.length()>=DocTkns[i].length() && Doc_lc.substring(0,DocTkns[i].length()).equals(DocTkns[i])) + { + if(DocTkns[i].length()>0) + { + Doc_lc=Doc_lc.substring(DocTkns[i].length()); + Offset=Offset+DocTkns[i].length(); + } + } + Last = Offset; + while(Doc_lc.length()>0 && Doc_lc.substring(0,1).matches("[\\W]")) //clean the forward whitespace + { + Doc_lc=Doc_lc.substring(1); + Offset++; + } + i++; + + if(ConceptType.equals("Species")) + { + if(i0 && Doc_lc.substring(0,1).matches("[\\W]")) //clean the forward whitespace + { + Doc_lc=Doc_lc.substring(1); + Offset++; + } + i++; + } + } + + if(!tmp.Concept.equals("") && (Last-Start>0)) //Keep found concept + { + if(Last=DocTkns.length){break;} + else if(i==DocTkns.length-1){PrefixTranslation=2;} + + //System.out.println(i+"\t"+Start+"\t"+Last+"\t("+FirstTime_while+")\t"+Offset+"\t"+Doc_lc); + + if(FirstTime_while==0) // first matched token + { + pre_i=i; + pre_Start=Start; + pre_Last=Last; + pre_Doc_lc=Doc_lc; + pre_Offset=Offset; + } + } + + if(find == true) + { + //System.out.println(find+"\t"+FirstTime_while+"\t"+Start+"\t"+Last+"\t"+Doc_org.substring(Start, Last)+"\t"+tmp.Concept); + if(!tmp.Concept.equals("")) //the last matched token has concept id + { + if(LastStart) + { + location.add(Start+"\t"+Last+"\t"+Doc_org.substring(Start, Last)+"\t"+tmp.Concept); + } + } + else + { + if(!ConceptFound_STR.equals("")) //Keep found concept + { + location.add(ConceptFound_STR); + i = ConceptFound + 1; + } + + if(FirstTime_while>=1) + { + i=pre_i; + Start=pre_Start; + Last=pre_Last; + Doc_lc=pre_Doc_lc; + Offset=pre_Offset; + } + } + Start=0; + Last=0; + if(i>0){i--;} + ConceptFound=i; //Keep found concept + ConceptFound_STR="";//Keep found concept + } + else //if(find == false) + { + //System.out.println(find+"\t"+FirstTime_while+"\t"+Start+"\t"+Last+"\t"+Doc_org.substring(Start, Last)+"\t"+tmp.Concept); + + if(FirstTime_while>=1 && tmp.Concept.equals("")) + { + i=pre_i; + Start=pre_Start; + Last=pre_Last; + Doc_lc=pre_Doc_lc; + Offset=pre_Offset; + } + + if(Doc_lc.length()>=DocTkns[i].length() && Doc_lc.substring(0,DocTkns[i].length()).equals(DocTkns[i])) + { + if(DocTkns[i].length()>0) + { + Doc_lc=Doc_lc.substring(DocTkns[i].length()); + Offset=Offset+DocTkns[i].length(); + } + } + } + + while(Doc_lc.length()>0 && Doc_lc.substring(0,1).matches("[\\W]")) //clean the forward whitespace + { + Doc_lc=Doc_lc.substring(1); + Offset++; + } + FirstTime++; + + //System.out.println(); + } + return location; + } + + /* + * Print out the Prefix Tree + */ + public String PrintTree() + { + return Tr.PrintTree_preorder(Tr.root,""); + } + + public void SaveTree(String outputfile) throws IOException + { + BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputfile), "UTF-8")); + Tr.SaveTree_preorder(Tr.root,"",fr); + fr.close(); + } + + + public void insertMention(String Mention, String Identifier) + { + Tr.insertMention(Mention,Identifier); + } +} + +class Tree +{ + /* + * Prefix Tree - root node + */ + public TreeNode root; + + public Tree() + { + root = new TreeNode("-ROOT-"); + } + + /* + * Insert mention into the tree + */ + public void insertMention(String Mention, String Identifier) + { + Mention=Mention.toLowerCase(); + + Mention = Mention.replaceAll("([0-9])([A-Za-z])", "$1 $2"); + Mention = Mention.replaceAll("([A-Za-z])([0-9])", "$1 $2"); + Mention = Mention.replaceAll("[\\W\\-\\_]+", " "); + /* = keep special characters = + * + String regex="\\s+|(?=\\p{Punct})|(?<=\\p{Punct})"; + String Tokens[]=Mention.split(regex); + */ + String Tokens[]=Mention.split(" "); + TreeNode tmp = root; + for(int i=0;i=0) + { + tmp=tmp.links.get( tmp.CheckChild(Tokens[i],0) ); //go through next generation (exist node) + if(i == Tokens.length-1) + { + tmp.Concept=Identifier; + } + } + else //not exist + { + if(i == Tokens.length-1) + { + tmp.InsertToken(Tokens[i],Identifier); + } + else + { + tmp.InsertToken(Tokens[i]); + } + tmp=tmp.links.get(tmp.NumOflinks-1); //go to the next generation (new node) + } + } + } + + /* + * Print the tree by pre-order + */ + public String PrintTree_preorder(TreeNode node, String LocationInTree) + { + String opt=""; + if(!node.token.equals("-ROOT-"))//Ignore root + { + if(node.Concept.equals("")) + { + opt=opt+LocationInTree+"\t"+node.token+"\n"; + } + else + { + opt=opt+LocationInTree+"\t"+node.token+"\t"+node.Concept+"\n"; + } + } + if(!LocationInTree.equals("")){LocationInTree=LocationInTree+"-";} + for(int i=0;i Hashs; + ArrayList links; + + public TreeNode(String Tok,String ID) + { + token = Tok; + NumOflinks = 0; + Concept = ID; + links = new ArrayList();/*link*/ + Hashs = new HashMap();/*hash*/ + } + public TreeNode(String Tok) + { + token = Tok; + NumOflinks = 0; + Concept = ""; + links = new ArrayList();/*link*/ + Hashs = new HashMap();/*hash*/ + } + public TreeNode() + { + token = ""; + NumOflinks = 0; + Concept = ""; + links = new ArrayList();/*link*/ + Hashs = new HashMap();/*hash*/ + } + + public String toString() + { + return (token+"\t"+Concept); + } + + /* + * Insert an new node under the target node + */ + public void InsertToken(String Tok) + { + TreeNode NewNode = new TreeNode(Tok); + + /*link*/ + links.add(NewNode); + + /*hash*/ + Hashs.put(Tok, NumOflinks); + + NumOflinks++; + } + public void InsertToken(String Tok,String ID) + { + TreeNode NewNode = new TreeNode(Tok,ID); + /*link*/ + links.add(NewNode); + + /*hash*/ + Hashs.put(Tok, NumOflinks); + + NumOflinks++; + } + + /* + * Check the tokens of children + */ + public int CheckChild(String Tok, Integer PrefixTranslation) + { + if(Hashs.containsKey(Tok)) + { + return(Hashs.get(Tok)); + } + + if(PrefixTranslation == 1 && Tok.matches("(alpha|beta|gamam|[abg]|[12])")) // SuffixTranslationMap + { + if(Hashs.containsKey(GNormPlus.SuffixTranslationMap_hash.get(Tok))) + { + return(Hashs.get(GNormPlus.SuffixTranslationMap_hash.get(Tok))); + } + + } + else if(PrefixTranslation == 2 && Tok.matches("[1-5]")) // for CTDGene feature + { + for(int i=0;i SPID_hash = new HashMap(); - ArrayList TargetedLocation = new ArrayList(); - HashMap GenusNames = new HashMap(); - HashMap Mention2ID_lc = new HashMap(); - ArrayList IDset = new ArrayList(); - for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) /** Paragraphs : j */ - { - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context - - /** Species recognition */ - ArrayList locations = GNormPlus.PT_Species.SearchMentionLocation(PassageContext,"Species"); /** PT_Species */ - for (int k = 0 ; k < locations.size() ; k++) - { - String anno[]=locations.get(k).split("\t"); - int start= Integer.parseInt(anno[0]); - int last= Integer.parseInt(anno[1]); - - // For anti-serum filtering - String ForwardSTR=""; - String BackwardSTR=""; - if(start>21) - { - ForwardSTR = (PassageContext+"ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(start-21,last); - } - else - { - ForwardSTR = (PassageContext+"ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(0,last); - } - if(PassageContext.length()>last+21) - { - BackwardSTR = PassageContext.substring(start,last+21); - } - else - { - BackwardSTR = PassageContext.substring(start,PassageContext.length()); - } - - String mention = anno[2]; - String id = anno[3]; - String mention_tmp=mention.toLowerCase(); - mention_tmp = mention_tmp.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1"); - String antibody=""; - if(ForwardSTR.toLowerCase().matches(".*(anti|antibody|antibodies|serum|polyclonal|monoclonal|igg)[\\W\\-\\_]+"+mention_tmp)) {antibody="(anti)";}//filtering : antibody - else if(BackwardSTR.toLowerCase().matches(mention_tmp+"[\\W\\-\\_]+(anti|antibody|antibodies|serum|polyclonal|monoclonal|igg).*")){antibody="(anti)";} //filtering : antibody - else if(BackwardSTR.toLowerCase().matches(mention_tmp+"[\\W\\-\\_]+[A-Za-z0-9]+[\\W\\-\\_]+(anti|antibody|antibodies|serum|polyclonal|monoclonal|igg).*")){antibody="(anti)";} //filtering : antibody - - if(mention.matches(".*[\\(\\[\\{].*") && BackwardSTR.toLowerCase().matches(mention_tmp+"\\).*") ) - { - last=last+1; - mention=mention+")"; - } - - if(BackwardSTR.toLowerCase().matches(mention_tmp+"[0-9].*")){} // filtered: Bee1p - else if((mention.matches(".*[;:,].*")) && mention.length()<=10){} // filtered : x, XXX - else if(mention.matches("to[\\W\\-\\_]+[0-9]+")){} // to 7 - else if(mention.matches("[a-z][\\)\\]\\}].*") && (!mention.matches(".*[\\(\\[\\{].*")) && mention.length()<=10){} // s). Major - else if(mention.matches(".*[\\(\\[\\{].*") && (!mention.matches(".*[\\)\\]\\}].*")) && mention.length()<=10){} // s). Major - else if(!id.equals("NA")) - { - if(GNormPlus.BioCDocobj.Annotations.size()>i && GNormPlus.BioCDocobj.Annotations.get(i).size()>j) - { - if((!mention.matches("^[A-Za-z] [A-Za-z0-9]+$")) && (mention.length()>=3)) // invalid species: "a group/a GAL4/a strain" - { - if(FilterAntibody.equals("False") || (!antibody.equals("(anti)"))) - { - String patt="^(.+?) [sS]train"; - Pattern ptmp = Pattern.compile(patt); - Matcher mtmp = ptmp.matcher(mention); - if(mtmp.find()) - { - mention=mtmp.group(1); - last=last-7; - } - GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tSpecies\t"+id); //+antibody - String mentions_tmp=mention.toLowerCase(); - mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); - mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); - GNormPlus.Filtering_hash.put(mentions_tmp,""); - Mention2ID_lc.put(mention.toLowerCase(), id); //+antibody - - String mention_genus = ""; - patt="^([A-Za-z]+) "; - ptmp = Pattern.compile(patt); - mtmp = ptmp.matcher(mention); - if(mtmp.find()) - { - mention_genus=mtmp.group(1); // get genus - } - - IDset.add(id); - for(int s=start;si && GNormPlus.BioCDocobj.Annotations.get(i).size()>j) - { - if(!TargetedLocation.contains(j+"\t"+start)) //already exists - { - int last40=0; - if(PassageContext.length()>=last+40) - { - last40=last+40; - } - else - { - last40=PassageContext.length(); - } - - // For anti-serum filtering - String ForwardSTR=""; - String BackwardSTR=""; - if(start>21) - { - ForwardSTR = PassageContext.substring(start-21,last); - } - else - { - ForwardSTR = PassageContext.substring(0,last); - } - if(PassageContext.length()>last+21) - { - BackwardSTR = PassageContext.substring(start,last+21); - } - else - { - BackwardSTR = PassageContext.substring(start,PassageContext.length()); - } - String mention_tmp=mention.toLowerCase(); - mention_tmp = mention_tmp.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1"); - if(mention_tmp.matches(".*[\\[\\]\\(\\)\\{\\}].*")){} - else if(BackwardSTR.toLowerCase().matches(mention_tmp+"[0-9\\-\\_].*")){} // filtered: Bee1p - else if(ForwardSTR.toLowerCase().matches(".*[0-9\\-\\_]"+mention_tmp)){} // filtered: IL-22RA1 - else - { - String patt="[\\W\\-]cell([\\- ]*line|)[s]*[\\W\\-]"; - Pattern ptmp = Pattern.compile(patt); - Matcher mtmp = ptmp.matcher(PassageContext.substring(last, last40).toLowerCase()); - if(mtmp.find()) - { - if(GNormPlus.taxid4gene.contains(id)) // for gene - { - id="*"+id; - } - GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tCell\t"+id); - String mentions_tmp=mention.toLowerCase(); - mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); - mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); - GNormPlus.Filtering_hash.put(mentions_tmp,""); - IDset.add(id); - for(int s=start;s=7) - { - GenusNames.put(ID,SPID_hash.get(ID)); - } - } - } - - GenusNames.put("3702", "arabidopsis"); - GenusNames.put("4932", "saccharomyces"); - GenusNames.put("562", "escherichia"); - GenusNames.put("7227", "drosophila"); - GenusNames.put("8355", "xenopus"); - - PT_Genus.Hash2Tree(GenusNames); - - /** Genus recognition */ - for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) /** Paragraphs : j */ - { - if(GNormPlus.BioCDocobj.PassageContexts.size()>i && - GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j && - GNormPlus.BioCDocobj.Annotations.size()>i && - GNormPlus.BioCDocobj.Annotations.get(i).size()>j - ) - { - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); - ArrayList locations_Genus = PT_Genus.SearchMentionLocation(PassageContext,"Genus"); /** PT_Genus*/ - for (int k = 0 ; k < locations_Genus.size() ; k++) - { - String anno[]=locations_Genus.get(k).split("\t"); - String start= anno[0]; - String last= anno[1]; - String mention = anno[2]; - String id = anno[3]; - if(!TargetedLocation.contains(j+"\t"+start)) //already exists - { - String patt="^\\**([0-9]+)$"; - Pattern ptmp = Pattern.compile(patt); - Matcher mtmp = ptmp.matcher(id); - if(mtmp.find()) - { - id = mtmp.group(1); - } - - if(GNormPlus.taxid4gene.contains(id)) // for gene - { - id="*"+id; - } - GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tGenus\t"+id); - String mentions_tmp=mention.toLowerCase(); - mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); - mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); - GNormPlus.Filtering_hash.put(mentions_tmp,""); - IDset.add(id); - for(int s=Integer.parseInt(start);s StrainID_hash = new HashMap(); - BufferedReader br = new BufferedReader(new FileReader(StrainFilename)); - String line=""; - while ((line = br.readLine()) != null) - { - String l[]=line.split("\t"); - String ancestor = l[0]; - String tax_id = l[1]; - String tax_names = l[2]; - if(SPID_hash.containsKey(ancestor)) - { - StrainID_hash.put(tax_id, tax_names); // tax id -> strain - } - else if(SPID_hash.containsKey(tax_id)) - { - StrainID_hash.put(tax_id, tax_names); // tax id -> strain - } - } - br.close(); - HashMap StrainNames = new HashMap(); - for(String ID: StrainID_hash.keySet()) - { - StrainNames.put(ID,StrainID_hash.get(ID)); - } - - PT_Strain.Hash2Tree(StrainNames); - - /** Strain recognition */ - for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) /** Paragraphs : j */ - { - if(GNormPlus.BioCDocobj.PassageContexts.size()>i && - GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j && - GNormPlus.BioCDocobj.Annotations.size()>i && - GNormPlus.BioCDocobj.Annotations.get(i).size()>j - ) - { - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context - ArrayList locations_Strain = PT_Strain.SearchMentionLocation(PassageContext,"Strain"); /** PT_Strain*/ - for (int k = 0 ; k < locations_Strain.size() ; k++) - { - String anno[]=locations_Strain.get(k).split("\t"); - String start= anno[0]; - String last= anno[1]; - String mention = anno[2]; - String id = anno[3]; - if(!TargetedLocation.contains(j+"\t"+start)) //already exists - { - if((!mention.matches(".*[;,\\{\\}\\(\\)\\[\\]].*")) && !mention.matches("[a-z]{1,4} [0-9]{1,3}")) - { - if(GNormPlus.taxid4gene.contains(id)) // for gene - { - id="*"+id; - } - GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tStrain\t"+id); - String mentions_tmp=mention.toLowerCase(); - mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); - mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); - GNormPlus.Filtering_hash.put(mentions_tmp,""); - IDset.add(id); - for(int s=Integer.parseInt(start);s OtherNames = new HashMap(); - for(String men : Mention2ID_lc.keySet()) - { - String men_id= Mention2ID_lc.get(men); - if(GNormPlus.PmidLF2Abb_lc_hash.containsKey(Pmid+"\t"+men)) - { - String Abb = GNormPlus.PmidLF2Abb_lc_hash.get(Pmid+"\t"+men); - // Abbreviation - if(OtherNames.containsKey(men_id)) - { - OtherNames.put(men_id, OtherNames.get(men_id)+"|"+Abb); - } - else - { - OtherNames.put(men_id,Abb); - } - } - String men_nospace=men.replaceAll(" ", ""); - // no space - if(OtherNames.containsKey(men_id)) - { - OtherNames.put(men_id, OtherNames.get(men_id)+"|"+men_nospace); - } - else - { - OtherNames.put(men_id,men_nospace); - } - } - PrefixTree PT_Others = new PrefixTree(); - PT_Others.Hash2Tree(OtherNames); - - /** - * - * Others: - * 1) Abbreviation - * 2) no space - * - * */ - for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) /** Paragraphs : j */ - { - if(GNormPlus.BioCDocobj.PassageContexts.size()>i && - GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j && - GNormPlus.BioCDocobj.Annotations.size()>i && - GNormPlus.BioCDocobj.Annotations.get(i).size()>j - ) - { - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context - ArrayList locations_Abb = PT_Others.SearchMentionLocation(PassageContext,"Species"); /** PT_Abb*/ - for (int k = 0 ; k < locations_Abb.size() ; k++) - { - String anno[]=locations_Abb.get(k).split("\t"); - String start= anno[0]; - String last= anno[1]; - String mention = anno[2]; - String id = anno[3]; - if(!TargetedLocation.contains(j+"\t"+start)) //already exists - { - if(GNormPlus.taxid4gene.contains(id)) // for gene - { - id="*"+id; - } - GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tSpecies\t"+id); - String mentions_tmp=mention.toLowerCase(); - mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); - mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); - GNormPlus.Filtering_hash.put(mentions_tmp,""); - Mention2ID_lc.put(mention.toLowerCase(), id); - IDset.add(id); - for(int s=Integer.parseInt(start);si && GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j && GNormPlus.BioCDocobj.Annotations.size()>i && GNormPlus.BioCDocobj.Annotations.get(i).size()>j) - { - ArrayList remove_anno = new ArrayList (); - for (int a = 0; a < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); a++) /** Annotations : a */ - { - String SpAnno[]=GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(a).split("\t"); - String start= SpAnno[0]; - String last= SpAnno[1]; - String mention = SpAnno[2]; - String type = SpAnno[3]; - - if(type.matches("Gene|FamilyName")) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(a,start+"\t"+last+"\t"+mention+"\t"+type); - } - else if(type.matches("Species|Genus|Strain|Cell") && SpAnno.length==5) - { - //System.out.println(GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(a)); - /** Abbreviation solution */ - if(GNormPlus.PmidAbb2LF_lc_hash.containsKey(Pmid+"\t"+mention.toLowerCase()) && Mention2ID_lc.containsKey(GNormPlus.PmidAbb2LF_lc_hash.containsKey(Pmid+"\t"+mention.toLowerCase()))) - { - String LF_lc=GNormPlus.PmidAbb2LF_lc_hash.get(Pmid+"\t"+mention.toLowerCase()); - if(Mention2ID_lc.containsKey(LF_lc)) - { - String LF_ID=Mention2ID_lc.get(LF_lc); - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(a, start+"\t"+last+"\t"+mention+"\t"+type+"\t"+LF_ID); - String mentions_tmp=mention.toLowerCase(); - mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); - mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); - GNormPlus.Filtering_hash.put(mentions_tmp,""); - } - } - else if (SpAnno.length>4) - { - String id = SpAnno[4]; - String id_split[]=id.split(";"); - if(id_split.length>=2) - { - /** Smallest set of tax ids */ - boolean found=false; - for(int x=0;x= 0 ; counter--) - { - int ai=remove_anno.get(counter); - //System.out.println("\n"+ai+"\t"+GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(ai)); - GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(ai); - } - } - } - } - GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,true); //save in BioC file - } - public void SpeciesAssignment(String Filename,String FilenameBioC) throws IOException, XMLStreamException - { - GNormPlus.BioCDocobj.Annotations = new ArrayList(); - GNormPlus.BioCDocobj.BioCReaderWithAnnotation(Filename); - - BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); - for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) /** PMIDs : i */ - { - HashMap PrefixIDTarget_hash = new HashMap(); - PrefixIDTarget_hash.put("9606", "h"); - PrefixIDTarget_hash.put("10090", "m"); - PrefixIDTarget_hash.put("10116", "r"); - PrefixIDTarget_hash.put("4932", "y"); - PrefixIDTarget_hash.put("7227", "d"); - PrefixIDTarget_hash.put("7955", "z|zf|Zf|dr|Dr"); - PrefixIDTarget_hash.put("3702", "at|At"); - - HashMap SP2Num_hash = new HashMap(); - for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) /** Paragraphs : j */ - { - for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) // Annotation : k - { - String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); - if(anno.length==5) //Species - { - String patt="^\\**([0-9]+)$"; - Pattern ptmp = Pattern.compile(patt); - Matcher mtmp = ptmp.matcher(anno[4]); - if(mtmp.find()) - { - String id = mtmp.group(1); - - if(!PrefixIDTarget_hash.containsKey(id)) - { - PrefixIDTarget_hash.put(id,GNormPlus.PrefixID_hash.get(id)); // taxid -> prefix - } - if(j == 0)//title - { - if(SP2Num_hash.containsKey(id)) - { - SP2Num_hash.put(id, SP2Num_hash.get(id)+2); - } - else - { - if(GNormPlus.TaxFreq_hash.containsKey(id)) - { - SP2Num_hash.put(id, GNormPlus.TaxFreq_hash.get(id)+2); - } - else - { - SP2Num_hash.put(id, 2.0); - } - } - // Virus -> Human (not to double weight human to virus) - /*if(GNormPlus.SP_Virus2Human_hash.containsKey(id)) - { - if(SP2Num_hash.containsKey("9606")) - { - SP2Num_hash.put("9606", SP2Num_hash.get("9606")+2); - } - else - { - SP2Num_hash.put("9606", 2 + GNormPlus.TaxFreq_hash.get("9606")+1); - } - }*/ - } - else - { - if(SP2Num_hash.containsKey(id)) - { - SP2Num_hash.put(id, SP2Num_hash.get(id)+1); - } - else - { - if(GNormPlus.TaxFreq_hash.containsKey(id)) - { - SP2Num_hash.put(id, 1 + GNormPlus.TaxFreq_hash.get(id)); - } - else - { - SP2Num_hash.put(id, 1.0); - } - } - // Virus -> Human - /*if(GNormPlus.SP_Virus2Human_hash.containsKey(id)) - { - if(SP2Num_hash.containsKey("9606")) - { - SP2Num_hash.put("9606", SP2Num_hash.get("9606")+1); - } - else - { - SP2Num_hash.put("9606", GNormPlus.TaxFreq_hash.get("9606")+1); - } - }*/ - } - } - } - } - } - String MajorSP="9606"; - double MaxSP=0; - for(String tid : SP2Num_hash.keySet()) - { - if(SP2Num_hash.get(tid)>MaxSP) - { - MajorSP=tid; - MaxSP=SP2Num_hash.get(tid); - } - } - - for (int j = 0; j < GNormPlus.BioCDocobj.PassageContexts.get(i).size(); j++) /** Paragraphs : j */ - { - String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context - //int PassageOffset = GNormPlus.BioCDocobj.PassageOffsets.get(i).get(j); // Passage offset - iterator.setText(PassageContext); - ArrayList Sentence_offsets = new ArrayList(); - int Sent_start = iterator.first(); - for (int Sent_last = iterator.next(); Sent_last != BreakIterator.DONE; Sent_start = Sent_last, Sent_last = iterator.next()) - { - Sentence_offsets.add(Sent_start); - } - - HashMap Annotations_Gene_hash = new HashMap(); - ArrayList Annotations_Species = new ArrayList(); - if(GNormPlus.BioCDocobj.Annotations.get(i).size()>j) - { - for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) // Annotation : k - { - String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); - if(anno.length==5) //Species - { - Annotations_Species.add(GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k)); - } - else //Gene : if(anno.length==3) - { - //String mention = PassageContext.substring(Integer.parseInt(anno[0]), Integer.parseInt(anno[1])); - Annotations_Gene_hash.put(k,GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k)); // k -> Gene Annotation - } - } - - //Gene --> Species Inference (PMID:28777492) - HashMap> mention2Location2Species_hash = new HashMap>(); - HashMap Location2Species_hash = new HashMap(); - for (int k : Annotations_Gene_hash.keySet()) // k is the index of GNormPlus.BioCDocobj.Annotations.get(i).get(j) - { - boolean SPfound = false; - String anno[] = Annotations_Gene_hash.get(k).split("\t"); - int G_Start= Integer.parseInt(anno[0]); - int G_Last= Integer.parseInt(anno[1]); - String G_mentions = anno[2]; - /** - * 2. Co-occurring word - * boundary : - * Sentence Start: Sentence_offsets.get(Target_Sentence) - * Sentence Last: Sentence_offsets.get(Target_Sentence+1) - */ - //Find the target sentence - int Target_Sentence=0; - if(SPfound == false) // 1. left : Closed to start of the gene mention - { - for(int s=0;s Target_Sentence+1){ Sentence_Last = Sentence_offsets.get(Target_Sentence+1); } - if(SPfound == false) // 1. left : Closed to start of the gene mention - { - int closet_Sp_Start=0; - for(int sp=0;sp= Sentence_Start && Sp_Start >closet_Sp_Start) - { - closet_Sp_Start=Sp_Start; - Location2Species_hash.put(Integer.parseInt(anno[0]), taxid); - - if(mention2Location2Species_hash.containsKey(G_mentions.toLowerCase())) - { - mention2Location2Species_hash.get(G_mentions.toLowerCase()).put(Integer.parseInt(anno[0]), taxid); - } - else - { - mention2Location2Species_hash.put(G_mentions.toLowerCase(),Location2Species_hash); - } - - SPfound=true; - } - } - } - } - if(SPfound == false) // 2. right : Closed to last of the gene mention - { - int closet_Sp_Last=1000000; - for(int sp=0;sp= G_Last && Sp_Last <= Sentence_Last && Sp_Last < closet_Sp_Last) - { - closet_Sp_Last=Sp_Last; - Location2Species_hash.put(Integer.parseInt(anno[0]), taxid); - - if(mention2Location2Species_hash.containsKey(G_mentions.toLowerCase())) - { - mention2Location2Species_hash.get(G_mentions.toLowerCase()).put(Integer.parseInt(anno[0]), taxid); - } - else - { - mention2Location2Species_hash.put(G_mentions.toLowerCase(),Location2Species_hash); - } - - SPfound=true; - } - } - } - } - } - - for (int k : Annotations_Gene_hash.keySet()) // k is the index of GNormPlus.BioCDocobj.Annotations.get(i).get(j) - { - String anno[] = Annotations_Gene_hash.get(k).split("\t"); - int G_Start= Integer.parseInt(anno[0]); - int G_Last= Integer.parseInt(anno[1]); - String G_mentions = anno[2]; - String G_type = anno[3]; - String G_mention_list[]=G_mentions.split("\\|"); - String G_mention=G_mention_list[0]; // only use the first term to detect species ; should be updated after SimConcept - - /** 1. prefix */ - boolean SPfound = false; - for(String taxid: PrefixIDTarget_hash.keySet()) - { - if(GNormPlus.GeneWithoutSPPrefix_hash.containsKey(G_mention.toLowerCase())) - { - //special case, and no need for prefix - SA - } - else - { - Pattern ptmp = Pattern.compile("^("+PrefixIDTarget_hash.get(taxid)+")([A-Z].*)$"); - Matcher mtmp = ptmp.matcher(G_mention); - if(mtmp.find()) - { - String MentionWoPrefix=mtmp.group(2); - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, anno[0]+"\t"+anno[1]+"\t"+anno[2]+"|"+MentionWoPrefix+"\t"+anno[3]+"\tPrefix:"+taxid); - SPfound=true; - break; - } - } - } - - /** - * 2. Co-occurring word - * boundary : - * Sentence Start: Sentence_offsets.get(Target_Sentence) - * Sentence Last: Sentence_offsets.get(Target_Sentence+1) - */ - //Find the target sentence - int Target_Sentence=0; - if(SPfound == false) // 1. left : Closed to start of the gene mention - { - for(int s=0;s Target_Sentence+1){ Sentence_Last = Sentence_offsets.get(Target_Sentence+1); } - if(SPfound == false) // 1. left : Closed to start of the gene mention - { - int closet_Sp_Start=0; - for(int sp=0;sp= Sentence_Start && Sp_Start >closet_Sp_Start) - { - closet_Sp_Start=Sp_Start; - if(GNormPlus.SP_Virus2Human_hash.containsKey(taxid)) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tLeft:"+taxid+"&9606"); - } - else - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tLeft:"+taxid); - } - SPfound=true; - } - } - } - } - if(SPfound == false) // 2. right : Closed to last of the gene mention - { - int closet_Sp_Last=1000000; - for(int sp=0;sp= G_Last && Sp_Last <= Sentence_Last && Sp_Last < closet_Sp_Last) - { - closet_Sp_Last=Sp_Last; - if(GNormPlus.SP_Virus2Human_hash.containsKey(taxid)) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tRight:"+taxid+"&9606"); - } - else - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tRight:"+taxid); - } - SPfound=true; - } - } - } - } - - /** 3. Focus species */ - if(SPfound == false) // 2. right : Closed to last of the gene mention - { - // 1. only the mentions appeared earlier are inferred - // - if(mention2Location2Species_hash.containsKey(G_mentions.toLowerCase())) - { - int closed_loca=0; - for (int loca_start : mention2Location2Species_hash.get(G_mentions.toLowerCase()).keySet()) - { - if(loca_startclosed_loca) - { - closed_loca=loca_start; - } - } - } - if(closed_loca>0) - { - if(GNormPlus.SP_Virus2Human_hash.containsKey(Location2Species_hash.get(closed_loca))) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+Location2Species_hash.get(closed_loca)+"&9606"); - } - else - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+Location2Species_hash.get(closed_loca)); - } - } - else - { - if(GNormPlus.SP_Virus2Human_hash.containsKey(MajorSP)) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+MajorSP+"&9606"); - } - else - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+MajorSP); - } - } - } - else - { - if(GNormPlus.SP_Virus2Human_hash.containsKey(MajorSP)) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+MajorSP+"&9606"); - } - else - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+MajorSP); - } - } - } - } - } - } - } - GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,true); - } - public void SpeciesAssignment(String Filename,String FilenameBioC,String FocusSpecies) throws IOException, XMLStreamException - { - for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) /** PMIDs : i */ - { - for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) /** Paragraphs : j */ - { - for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) // Annotation : k - { - String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); - if(anno.length==5) //Species - { - String id=anno[4].replaceAll("\\*", ""); - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, anno[0]+"\t"+anno[1]+"\t"+anno[2]+"\t"+anno[3]+"\t"+id); - } - else //Gene : if(anno.length==3) - { - /** 1. prefix */ - boolean SPfound = false; - if(GNormPlus.GeneWithoutSPPrefix_hash.containsKey(anno[2].toLowerCase())) - { - //special case, and no need for prefix - SA - } - else - { - Pattern ptmp = Pattern.compile("^("+GNormPlus.PrefixID_hash.get(FocusSpecies)+")([A-Z].*)$"); - Matcher mtmp = ptmp.matcher(anno[2]); - if(mtmp.find()) - { - String MentionWoPrefix=mtmp.group(2); - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, anno[0]+"\t"+anno[1]+"\t"+anno[2]+"|"+MentionWoPrefix+"\t"+anno[3]+"\tPrefix:"+FocusSpecies); - SPfound=true; - } - } - if(SPfound == false) - { - GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k)+"\tFocus:"+FocusSpecies); - } - } - } - } - } - GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,true); - } +/** + * Project: GNormPlus + * Function: Species recognition and Species assignment + */ + +package GNormPluslib; + +import bioc.BioCAnnotation; +import bioc.BioCCollection; +import bioc.BioCDocument; +import bioc.BioCLocation; +import bioc.BioCPassage; + +import bioc.io.BioCDocumentWriter; +import bioc.io.BioCFactory; +import bioc.io.woodstox.ConnectorWoodstox; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.text.BreakIterator; +import java.time.LocalDate; +import java.time.ZoneId; + +import javax.xml.stream.XMLStreamException; + +import org.tartarus.snowball.SnowballStemmer; +import org.tartarus.snowball.ext.englishStemmer; + +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Collections; + +public class SR +{ + @SuppressWarnings("null") + public void SpeciesRecognition(String Filename,String FilenameBioC,String StrainFilename,String FilterAntibody) throws IOException, XMLStreamException + { + /** Recognizing Species Names: SP */ + for (int i = 0; i < GNormPlus.BioCDocobj.PMIDs.size(); i++) /** PMIDs : i */ + { + String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); + PrefixTree PT_Genus = new PrefixTree(); + HashMap SPID_hash = new HashMap(); + ArrayList TargetedLocation = new ArrayList(); + HashMap GenusNames = new HashMap(); + HashMap Mention2ID_lc = new HashMap(); + ArrayList IDset = new ArrayList(); + for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) /** Paragraphs : j */ + { + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context + + /** Species recognition */ + ArrayList locations = GNormPlus.PT_Species.SearchMentionLocation(PassageContext,"Species"); /** PT_Species */ + for (int k = 0 ; k < locations.size() ; k++) + { + String anno[]=locations.get(k).split("\t"); + int start= Integer.parseInt(anno[0]); + int last= Integer.parseInt(anno[1]); + + // For anti-serum filtering + String ForwardSTR=""; + String BackwardSTR=""; + if(start>21) + { + ForwardSTR = (PassageContext+"ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(start-21,last); + } + else + { + ForwardSTR = (PassageContext+"ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(0,last); + } + if(PassageContext.length()>last+21) + { + BackwardSTR = PassageContext.substring(start,last+21); + } + else + { + BackwardSTR = PassageContext.substring(start,PassageContext.length()); + } + + String mention = anno[2]; + String id = anno[3]; + String mention_tmp=mention.toLowerCase(); + mention_tmp = mention_tmp.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1"); + String antibody=""; + if(ForwardSTR.toLowerCase().matches(".*(anti|antibody|antibodies|serum|polyclonal|monoclonal|igg)[\\W\\-\\_]+"+mention_tmp)) {antibody="(anti)";}//filtering : antibody + else if(BackwardSTR.toLowerCase().matches(mention_tmp+"[\\W\\-\\_]+(anti|antibody|antibodies|serum|polyclonal|monoclonal|igg).*")){antibody="(anti)";} //filtering : antibody + else if(BackwardSTR.toLowerCase().matches(mention_tmp+"[\\W\\-\\_]+[A-Za-z0-9]+[\\W\\-\\_]+(anti|antibody|antibodies|serum|polyclonal|monoclonal|igg).*")){antibody="(anti)";} //filtering : antibody + + if(mention.matches(".*[\\(\\[\\{].*") && BackwardSTR.toLowerCase().matches(mention_tmp+"\\).*") ) + { + last=last+1; + mention=mention+")"; + } + + if(BackwardSTR.toLowerCase().matches(mention_tmp+"[0-9].*")){} // filtered: Bee1p + else if((mention.matches(".*[;:,].*")) && mention.length()<=10){} // filtered : x, XXX + else if(mention.matches("to[\\W\\-\\_]+[0-9]+")){} // to 7 + else if(mention.matches("[a-z][\\)\\]\\}].*") && (!mention.matches(".*[\\(\\[\\{].*")) && mention.length()<=10){} // s). Major + else if(mention.matches(".*[\\(\\[\\{].*") && (!mention.matches(".*[\\)\\]\\}].*")) && mention.length()<=10){} // s). Major + else if(!id.equals("NA")) + { + if(GNormPlus.BioCDocobj.Annotations.size()>i && GNormPlus.BioCDocobj.Annotations.get(i).size()>j) + { + if((!mention.matches("^[A-Za-z] [A-Za-z0-9]+$")) && (mention.length()>=3)) // invalid species: "a group/a GAL4/a strain" + { + if(FilterAntibody.equals("False") || (!antibody.equals("(anti)"))) + { + String patt="^(.+?) [sS]train"; + Pattern ptmp = Pattern.compile(patt); + Matcher mtmp = ptmp.matcher(mention); + if(mtmp.find()) + { + mention=mtmp.group(1); + last=last-7; + } + GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tSpecies\t"+id); //+antibody + String mentions_tmp=mention.toLowerCase(); + mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); + mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); + GNormPlus.Filtering_hash.put(mentions_tmp,""); + Mention2ID_lc.put(mention.toLowerCase(), id); //+antibody + + String mention_genus = ""; + patt="^([A-Za-z]+) "; + ptmp = Pattern.compile(patt); + mtmp = ptmp.matcher(mention); + if(mtmp.find()) + { + mention_genus=mtmp.group(1); // get genus + } + + IDset.add(id); + for(int s=start;si && GNormPlus.BioCDocobj.Annotations.get(i).size()>j) + { + if(!TargetedLocation.contains(j+"\t"+start)) //already exists + { + int last40=0; + if(PassageContext.length()>=last+40) + { + last40=last+40; + } + else + { + last40=PassageContext.length(); + } + + // For anti-serum filtering + String ForwardSTR=""; + String BackwardSTR=""; + if(start>21) + { + ForwardSTR = PassageContext.substring(start-21,last); + } + else + { + ForwardSTR = PassageContext.substring(0,last); + } + if(PassageContext.length()>last+21) + { + BackwardSTR = PassageContext.substring(start,last+21); + } + else + { + BackwardSTR = PassageContext.substring(start,PassageContext.length()); + } + String mention_tmp=mention.toLowerCase(); + mention_tmp = mention_tmp.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1"); + if(mention_tmp.matches(".*[\\[\\]\\(\\)\\{\\}].*")){} + else if(BackwardSTR.toLowerCase().matches(mention_tmp+"[0-9\\-\\_].*")){} // filtered: Bee1p + else if(ForwardSTR.toLowerCase().matches(".*[0-9\\-\\_]"+mention_tmp)){} // filtered: IL-22RA1 + else + { + String patt="[\\W\\-]cell([\\- ]*line|)[s]*[\\W\\-]"; + Pattern ptmp = Pattern.compile(patt); + Matcher mtmp = ptmp.matcher(PassageContext.substring(last, last40).toLowerCase()); + if(mtmp.find()) + { + if(GNormPlus.taxid4gene.contains(id)) // for gene + { + id="*"+id; + } + GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tCell\t"+id); + String mentions_tmp=mention.toLowerCase(); + mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); + mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); + GNormPlus.Filtering_hash.put(mentions_tmp,""); + IDset.add(id); + for(int s=start;s=7) + { + GenusNames.put(ID,SPID_hash.get(ID)); + } + } + } + + GenusNames.put("3702", "arabidopsis"); + GenusNames.put("4932", "saccharomyces"); + GenusNames.put("562", "escherichia"); + GenusNames.put("7227", "drosophila"); + GenusNames.put("8355", "xenopus"); + + PT_Genus.Hash2Tree(GenusNames); + + /** Genus recognition */ + for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) /** Paragraphs : j */ + { + if(GNormPlus.BioCDocobj.PassageContexts.size()>i && + GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j && + GNormPlus.BioCDocobj.Annotations.size()>i && + GNormPlus.BioCDocobj.Annotations.get(i).size()>j + ) + { + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); + ArrayList locations_Genus = PT_Genus.SearchMentionLocation(PassageContext,"Genus"); /** PT_Genus*/ + for (int k = 0 ; k < locations_Genus.size() ; k++) + { + String anno[]=locations_Genus.get(k).split("\t"); + String start= anno[0]; + String last= anno[1]; + String mention = anno[2]; + String id = anno[3]; + if(!TargetedLocation.contains(j+"\t"+start)) //already exists + { + String patt="^\\**([0-9]+)$"; + Pattern ptmp = Pattern.compile(patt); + Matcher mtmp = ptmp.matcher(id); + if(mtmp.find()) + { + id = mtmp.group(1); + } + + if(GNormPlus.taxid4gene.contains(id)) // for gene + { + id="*"+id; + } + GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tGenus\t"+id); + String mentions_tmp=mention.toLowerCase(); + mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); + mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); + GNormPlus.Filtering_hash.put(mentions_tmp,""); + IDset.add(id); + for(int s=Integer.parseInt(start);s StrainID_hash = new HashMap(); + BufferedReader br = new BufferedReader(new FileReader(StrainFilename)); + String line=""; + while ((line = br.readLine()) != null) + { + String l[]=line.split("\t"); + String ancestor = l[0]; + String tax_id = l[1]; + String tax_names = l[2]; + if(SPID_hash.containsKey(ancestor)) + { + StrainID_hash.put(tax_id, tax_names); // tax id -> strain + } + else if(SPID_hash.containsKey(tax_id)) + { + StrainID_hash.put(tax_id, tax_names); // tax id -> strain + } + } + br.close(); + HashMap StrainNames = new HashMap(); + for(String ID: StrainID_hash.keySet()) + { + StrainNames.put(ID,StrainID_hash.get(ID)); + } + + PT_Strain.Hash2Tree(StrainNames); + + /** Strain recognition */ + for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) /** Paragraphs : j */ + { + if(GNormPlus.BioCDocobj.PassageContexts.size()>i && + GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j && + GNormPlus.BioCDocobj.Annotations.size()>i && + GNormPlus.BioCDocobj.Annotations.get(i).size()>j + ) + { + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context + ArrayList locations_Strain = PT_Strain.SearchMentionLocation(PassageContext,"Strain"); /** PT_Strain*/ + for (int k = 0 ; k < locations_Strain.size() ; k++) + { + String anno[]=locations_Strain.get(k).split("\t"); + String start= anno[0]; + String last= anno[1]; + String mention = anno[2]; + String id = anno[3]; + if(!TargetedLocation.contains(j+"\t"+start)) //already exists + { + if((!mention.matches(".*[;,\\{\\}\\(\\)\\[\\]].*")) && !mention.matches("[a-z]{1,4} [0-9]{1,3}")) + { + if(GNormPlus.taxid4gene.contains(id)) // for gene + { + id="*"+id; + } + GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tStrain\t"+id); + String mentions_tmp=mention.toLowerCase(); + mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); + mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); + GNormPlus.Filtering_hash.put(mentions_tmp,""); + IDset.add(id); + for(int s=Integer.parseInt(start);s OtherNames = new HashMap(); + for(String men : Mention2ID_lc.keySet()) + { + String men_id= Mention2ID_lc.get(men); + if(GNormPlus.PmidLF2Abb_lc_hash.containsKey(Pmid+"\t"+men)) + { + String Abb = GNormPlus.PmidLF2Abb_lc_hash.get(Pmid+"\t"+men); + // Abbreviation + if(OtherNames.containsKey(men_id)) + { + OtherNames.put(men_id, OtherNames.get(men_id)+"|"+Abb); + } + else + { + OtherNames.put(men_id,Abb); + } + } + String men_nospace=men.replaceAll(" ", ""); + // no space + if(OtherNames.containsKey(men_id)) + { + OtherNames.put(men_id, OtherNames.get(men_id)+"|"+men_nospace); + } + else + { + OtherNames.put(men_id,men_nospace); + } + } + PrefixTree PT_Others = new PrefixTree(); + PT_Others.Hash2Tree(OtherNames); + + /** + * + * Others: + * 1) Abbreviation + * 2) no space + * + * */ + for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) /** Paragraphs : j */ + { + if(GNormPlus.BioCDocobj.PassageContexts.size()>i && + GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j && + GNormPlus.BioCDocobj.Annotations.size()>i && + GNormPlus.BioCDocobj.Annotations.get(i).size()>j + ) + { + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context + ArrayList locations_Abb = PT_Others.SearchMentionLocation(PassageContext,"Species"); /** PT_Abb*/ + for (int k = 0 ; k < locations_Abb.size() ; k++) + { + String anno[]=locations_Abb.get(k).split("\t"); + String start= anno[0]; + String last= anno[1]; + String mention = anno[2]; + String id = anno[3]; + if(!TargetedLocation.contains(j+"\t"+start)) //already exists + { + if(GNormPlus.taxid4gene.contains(id)) // for gene + { + id="*"+id; + } + GNormPlus.BioCDocobj.Annotations.get(i).get(j).add(start+"\t"+last+"\t"+mention+"\tSpecies\t"+id); + String mentions_tmp=mention.toLowerCase(); + mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); + mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); + GNormPlus.Filtering_hash.put(mentions_tmp,""); + Mention2ID_lc.put(mention.toLowerCase(), id); + IDset.add(id); + for(int s=Integer.parseInt(start);si && GNormPlus.BioCDocobj.PassageContexts.get(i).size()>j && GNormPlus.BioCDocobj.Annotations.size()>i && GNormPlus.BioCDocobj.Annotations.get(i).size()>j) + { + ArrayList remove_anno = new ArrayList (); + for (int a = 0; a < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); a++) /** Annotations : a */ + { + String SpAnno[]=GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(a).split("\t"); + String start= SpAnno[0]; + String last= SpAnno[1]; + String mention = SpAnno[2]; + String type = SpAnno[3]; + + if(type.matches("Gene|FamilyName")) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(a,start+"\t"+last+"\t"+mention+"\t"+type); + } + else if(type.matches("Species|Genus|Strain|Cell") && SpAnno.length==5) + { + //System.out.println(GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(a)); + /** Abbreviation solution */ + if(GNormPlus.PmidAbb2LF_lc_hash.containsKey(Pmid+"\t"+mention.toLowerCase()) && Mention2ID_lc.containsKey(GNormPlus.PmidAbb2LF_lc_hash.containsKey(Pmid+"\t"+mention.toLowerCase()))) + { + String LF_lc=GNormPlus.PmidAbb2LF_lc_hash.get(Pmid+"\t"+mention.toLowerCase()); + if(Mention2ID_lc.containsKey(LF_lc)) + { + String LF_ID=Mention2ID_lc.get(LF_lc); + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(a, start+"\t"+last+"\t"+mention+"\t"+type+"\t"+LF_ID); + String mentions_tmp=mention.toLowerCase(); + mentions_tmp=mentions_tmp.replaceAll("[\\W\\-\\_]",""); + mentions_tmp=mentions_tmp.replaceAll("[0-9]","0"); + GNormPlus.Filtering_hash.put(mentions_tmp,""); + } + } + else if (SpAnno.length>4) + { + String id = SpAnno[4]; + String id_split[]=id.split(";"); + if(id_split.length>=2) + { + /** Smallest set of tax ids */ + boolean found=false; + for(int x=0;x= 0 ; counter--) + { + int ai=remove_anno.get(counter); + //System.out.println("\n"+ai+"\t"+GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(ai)); + GNormPlus.BioCDocobj.Annotations.get(i).get(j).remove(ai); + } + } + } + } + GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,true); //save in BioC file + } + public void SpeciesAssignment(String Filename,String FilenameBioC) throws IOException, XMLStreamException + { + GNormPlus.BioCDocobj.Annotations = new ArrayList(); + GNormPlus.BioCDocobj.BioCReaderWithAnnotation(Filename); + + BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); + for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) /** PMIDs : i */ + { + HashMap PrefixIDTarget_hash = new HashMap(); + PrefixIDTarget_hash.put("9606", "h"); + PrefixIDTarget_hash.put("10090", "m"); + PrefixIDTarget_hash.put("10116", "r"); + PrefixIDTarget_hash.put("4932", "y"); + PrefixIDTarget_hash.put("7227", "d"); + PrefixIDTarget_hash.put("7955", "z|zf|Zf|dr|Dr"); + PrefixIDTarget_hash.put("3702", "at|At"); + + HashMap SP2Num_hash = new HashMap(); + for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) /** Paragraphs : j */ + { + for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) // Annotation : k + { + String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); + if(anno.length==5) //Species + { + String patt="^\\**([0-9]+)$"; + Pattern ptmp = Pattern.compile(patt); + Matcher mtmp = ptmp.matcher(anno[4]); + if(mtmp.find()) + { + String id = mtmp.group(1); + + if(!PrefixIDTarget_hash.containsKey(id)) + { + PrefixIDTarget_hash.put(id,GNormPlus.PrefixID_hash.get(id)); // taxid -> prefix + } + if(j == 0)//title + { + if(SP2Num_hash.containsKey(id)) + { + SP2Num_hash.put(id, SP2Num_hash.get(id)+2); + } + else + { + if(GNormPlus.TaxFreq_hash.containsKey(id)) + { + SP2Num_hash.put(id, GNormPlus.TaxFreq_hash.get(id)+2); + } + else + { + SP2Num_hash.put(id, 2.0); + } + } + // Virus -> Human (not to double weight human to virus) + /*if(GNormPlus.SP_Virus2Human_hash.containsKey(id)) + { + if(SP2Num_hash.containsKey("9606")) + { + SP2Num_hash.put("9606", SP2Num_hash.get("9606")+2); + } + else + { + SP2Num_hash.put("9606", 2 + GNormPlus.TaxFreq_hash.get("9606")+1); + } + }*/ + } + else + { + if(SP2Num_hash.containsKey(id)) + { + SP2Num_hash.put(id, SP2Num_hash.get(id)+1); + } + else + { + if(GNormPlus.TaxFreq_hash.containsKey(id)) + { + SP2Num_hash.put(id, 1 + GNormPlus.TaxFreq_hash.get(id)); + } + else + { + SP2Num_hash.put(id, 1.0); + } + } + // Virus -> Human + /*if(GNormPlus.SP_Virus2Human_hash.containsKey(id)) + { + if(SP2Num_hash.containsKey("9606")) + { + SP2Num_hash.put("9606", SP2Num_hash.get("9606")+1); + } + else + { + SP2Num_hash.put("9606", GNormPlus.TaxFreq_hash.get("9606")+1); + } + }*/ + } + } + } + } + } + String MajorSP="9606"; + double MaxSP=0; + for(String tid : SP2Num_hash.keySet()) + { + if(SP2Num_hash.get(tid)>MaxSP) + { + MajorSP=tid; + MaxSP=SP2Num_hash.get(tid); + } + } + + for (int j = 0; j < GNormPlus.BioCDocobj.PassageContexts.get(i).size(); j++) /** Paragraphs : j */ + { + String PassageContext = GNormPlus.BioCDocobj.PassageContexts.get(i).get(j); // Passage context + //int PassageOffset = GNormPlus.BioCDocobj.PassageOffsets.get(i).get(j); // Passage offset + iterator.setText(PassageContext); + ArrayList Sentence_offsets = new ArrayList(); + int Sent_start = iterator.first(); + for (int Sent_last = iterator.next(); Sent_last != BreakIterator.DONE; Sent_start = Sent_last, Sent_last = iterator.next()) + { + Sentence_offsets.add(Sent_start); + } + + HashMap Annotations_Gene_hash = new HashMap(); + ArrayList Annotations_Species = new ArrayList(); + if(GNormPlus.BioCDocobj.Annotations.get(i).size()>j) + { + for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) // Annotation : k + { + String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); + if(anno.length==5) //Species + { + Annotations_Species.add(GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k)); + } + else //Gene : if(anno.length==3) + { + //String mention = PassageContext.substring(Integer.parseInt(anno[0]), Integer.parseInt(anno[1])); + Annotations_Gene_hash.put(k,GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k)); // k -> Gene Annotation + } + } + + //Gene --> Species Inference (PMID:28777492) + HashMap> mention2Location2Species_hash = new HashMap>(); + HashMap Location2Species_hash = new HashMap(); + for (int k : Annotations_Gene_hash.keySet()) // k is the index of GNormPlus.BioCDocobj.Annotations.get(i).get(j) + { + boolean SPfound = false; + String anno[] = Annotations_Gene_hash.get(k).split("\t"); + int G_Start= Integer.parseInt(anno[0]); + int G_Last= Integer.parseInt(anno[1]); + String G_mentions = anno[2]; + /** + * 2. Co-occurring word + * boundary : + * Sentence Start: Sentence_offsets.get(Target_Sentence) + * Sentence Last: Sentence_offsets.get(Target_Sentence+1) + */ + //Find the target sentence + int Target_Sentence=0; + if(SPfound == false) // 1. left : Closed to start of the gene mention + { + for(int s=0;s Target_Sentence+1){ Sentence_Last = Sentence_offsets.get(Target_Sentence+1); } + if(SPfound == false) // 1. left : Closed to start of the gene mention + { + int closet_Sp_Start=0; + for(int sp=0;sp= Sentence_Start && Sp_Start >closet_Sp_Start) + { + closet_Sp_Start=Sp_Start; + Location2Species_hash.put(Integer.parseInt(anno[0]), taxid); + + if(mention2Location2Species_hash.containsKey(G_mentions.toLowerCase())) + { + mention2Location2Species_hash.get(G_mentions.toLowerCase()).put(Integer.parseInt(anno[0]), taxid); + } + else + { + mention2Location2Species_hash.put(G_mentions.toLowerCase(),Location2Species_hash); + } + + SPfound=true; + } + } + } + } + if(SPfound == false) // 2. right : Closed to last of the gene mention + { + int closet_Sp_Last=1000000; + for(int sp=0;sp= G_Last && Sp_Last <= Sentence_Last && Sp_Last < closet_Sp_Last) + { + closet_Sp_Last=Sp_Last; + Location2Species_hash.put(Integer.parseInt(anno[0]), taxid); + + if(mention2Location2Species_hash.containsKey(G_mentions.toLowerCase())) + { + mention2Location2Species_hash.get(G_mentions.toLowerCase()).put(Integer.parseInt(anno[0]), taxid); + } + else + { + mention2Location2Species_hash.put(G_mentions.toLowerCase(),Location2Species_hash); + } + + SPfound=true; + } + } + } + } + } + + for (int k : Annotations_Gene_hash.keySet()) // k is the index of GNormPlus.BioCDocobj.Annotations.get(i).get(j) + { + String anno[] = Annotations_Gene_hash.get(k).split("\t"); + int G_Start= Integer.parseInt(anno[0]); + int G_Last= Integer.parseInt(anno[1]); + String G_mentions = anno[2]; + String G_type = anno[3]; + String G_mention_list[]=G_mentions.split("\\|"); + String G_mention=G_mention_list[0]; // only use the first term to detect species ; should be updated after SimConcept + + /** 1. prefix */ + boolean SPfound = false; + for(String taxid: PrefixIDTarget_hash.keySet()) + { + if(GNormPlus.GeneWithoutSPPrefix_hash.containsKey(G_mention.toLowerCase())) + { + //special case, and no need for prefix - SA + } + else + { + Pattern ptmp = Pattern.compile("^("+PrefixIDTarget_hash.get(taxid)+")([A-Z].*)$"); + Matcher mtmp = ptmp.matcher(G_mention); + if(mtmp.find()) + { + String MentionWoPrefix=mtmp.group(2); + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, anno[0]+"\t"+anno[1]+"\t"+anno[2]+"|"+MentionWoPrefix+"\t"+anno[3]+"\tPrefix:"+taxid); + SPfound=true; + break; + } + } + } + + /** + * 2. Co-occurring word + * boundary : + * Sentence Start: Sentence_offsets.get(Target_Sentence) + * Sentence Last: Sentence_offsets.get(Target_Sentence+1) + */ + //Find the target sentence + int Target_Sentence=0; + if(SPfound == false) // 1. left : Closed to start of the gene mention + { + for(int s=0;s Target_Sentence+1){ Sentence_Last = Sentence_offsets.get(Target_Sentence+1); } + if(SPfound == false) // 1. left : Closed to start of the gene mention + { + int closet_Sp_Start=0; + for(int sp=0;sp= Sentence_Start && Sp_Start >closet_Sp_Start) + { + closet_Sp_Start=Sp_Start; + if(GNormPlus.SP_Virus2Human_hash.containsKey(taxid)) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tLeft:"+taxid+"&9606"); + } + else + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tLeft:"+taxid); + } + SPfound=true; + } + } + } + } + if(SPfound == false) // 2. right : Closed to last of the gene mention + { + int closet_Sp_Last=1000000; + for(int sp=0;sp= G_Last && Sp_Last <= Sentence_Last && Sp_Last < closet_Sp_Last) + { + closet_Sp_Last=Sp_Last; + if(GNormPlus.SP_Virus2Human_hash.containsKey(taxid)) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tRight:"+taxid+"&9606"); + } + else + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tRight:"+taxid); + } + SPfound=true; + } + } + } + } + + /** 3. Focus species */ + if(SPfound == false) // 2. right : Closed to last of the gene mention + { + // 1. only the mentions appeared earlier are inferred + // + if(mention2Location2Species_hash.containsKey(G_mentions.toLowerCase())) + { + int closed_loca=0; + for (int loca_start : mention2Location2Species_hash.get(G_mentions.toLowerCase()).keySet()) + { + if(loca_startclosed_loca) + { + closed_loca=loca_start; + } + } + } + if(closed_loca>0) + { + if(GNormPlus.SP_Virus2Human_hash.containsKey(Location2Species_hash.get(closed_loca))) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+Location2Species_hash.get(closed_loca)+"&9606"); + } + else + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+Location2Species_hash.get(closed_loca)); + } + } + else + { + if(GNormPlus.SP_Virus2Human_hash.containsKey(MajorSP)) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+MajorSP+"&9606"); + } + else + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+MajorSP); + } + } + } + else + { + if(GNormPlus.SP_Virus2Human_hash.containsKey(MajorSP)) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+MajorSP+"&9606"); + } + else + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, Annotations_Gene_hash.get(k)+"\tFocus:"+MajorSP); + } + } + } + } + } + } + } + GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,true); + } + public void SpeciesAssignment(String Filename,String FilenameBioC,String FocusSpecies) throws IOException, XMLStreamException + { + for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) /** PMIDs : i */ + { + for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) /** Paragraphs : j */ + { + for (int k = 0; k < GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); k++) // Annotation : k + { + String anno[] = GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\t"); + if(anno.length==5) //Species + { + String id=anno[4].replaceAll("\\*", ""); + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, anno[0]+"\t"+anno[1]+"\t"+anno[2]+"\t"+anno[3]+"\t"+id); + } + else //Gene : if(anno.length==3) + { + /** 1. prefix */ + boolean SPfound = false; + if(GNormPlus.GeneWithoutSPPrefix_hash.containsKey(anno[2].toLowerCase())) + { + //special case, and no need for prefix - SA + } + else + { + Pattern ptmp = Pattern.compile("^("+GNormPlus.PrefixID_hash.get(FocusSpecies)+")([A-Z].*)$"); + Matcher mtmp = ptmp.matcher(anno[2]); + if(mtmp.find()) + { + String MentionWoPrefix=mtmp.group(2); + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, anno[0]+"\t"+anno[1]+"\t"+anno[2]+"|"+MentionWoPrefix+"\t"+anno[3]+"\tPrefix:"+FocusSpecies); + SPfound=true; + } + } + if(SPfound == false) + { + GNormPlus.BioCDocobj.Annotations.get(i).get(j).set(k, GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k)+"\tFocus:"+FocusSpecies); + } + } + } + } + } + GNormPlus.BioCDocobj.BioCOutput(Filename,FilenameBioC,GNormPlus.BioCDocobj.Annotations,false,true); + } } \ No newline at end of file diff --git a/src_Java/GNormPluslib/SimConcept.java b/src_Java/GNormPluslib/SimConcept.java index 4cc07c6a363437c83bc9b4b9c568b6ebd194fd31..0bf99b103811378a7fcfe4f459792973cd884673 100644 --- a/src_Java/GNormPluslib/SimConcept.java +++ b/src_Java/GNormPluslib/SimConcept.java @@ -1,1524 +1,1524 @@ -/** - * Project: GNormPlus - * Function: SimConcept : Simplify Composite mentions - */ - -package GNormPluslib; - -import bioc.BioCAnnotation; -import bioc.BioCCollection; -import bioc.BioCDocument; -import bioc.BioCLocation; -import bioc.BioCPassage; - -import bioc.io.BioCDocumentWriter; -import bioc.io.BioCFactory; -import bioc.io.woodstox.ConnectorWoodstox; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.text.BreakIterator; -import java.time.LocalDate; -import java.time.ZoneId; -import java.text.DecimalFormat; -import java.math.RoundingMode; - -import javax.xml.stream.XMLStreamException; - -import org.tartarus.snowball.SnowballStemmer; -import org.tartarus.snowball.ext.englishStemmer; - -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; - -public class SimConcept -{ - /* - * Feature Extraction - */ - public void FeatureExtraction_Train(String FilenameData) throws XMLStreamException - { - try - { - /** output files */ - BufferedWriter FileData = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameData), "UTF-8")); // .data - //NLP modules - SnowballStemmer stemmer = new englishStemmer(); - /** PMIDs : i */ - for (int i = 0; i < GNormPlus.BioCDocobj.PMIDs.size(); i++) - { - String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); - - /** Paragraphs : j */ - for (int j = 0; j < GNormPlus.BioCDocobj.PassageNames.get(i).size(); j++) - { - ArrayList Annotation = GNormPlus.BioCDocobj.Annotations.get(i).get(j); - /** Annotations : k - * 0 start - * 1 last - * 2 mention - * 3 type - * 4 id - */ - int Inital_Annotation_size=Annotation.size(); - for (int k = 0; k < Annotation.size() ; k++) // k : Annotations - { - String anno[]=Annotation.get(k).split("\\t",-1); - int MentionStart= Integer.parseInt(anno[0]); - int MentionLast= Integer.parseInt(anno[1]); - String Mention = anno[2]; - String Type = anno[3]; - if(anno.length>4) - { - String ID = anno[4]; - - String TokenSTR = Mention; - TokenSTR = TokenSTR.replaceAll("([0-9])([A-Za-z])", "$1 $2"); - TokenSTR = TokenSTR.replaceAll("([A-Za-z])([0-9])", "$1 $2"); - TokenSTR = TokenSTR.replaceAll("([A-Z])([a-z])", "$1 $2"); - TokenSTR = TokenSTR.replaceAll("([a-z])([A-Z])", "$1 $2"); - TokenSTR = TokenSTR.replaceAll("([\\W])", " $1 "); - TokenSTR = TokenSTR.replaceAll("[ ]+", " "); - TokenSTR = TokenSTR.replaceAll("^[ ]+", ""); - TokenSTR = TokenSTR.replaceAll("[ ]+$", ""); - - /* - * Only for Gene - */ - if(ID.equals("ASJAS") && kInteger.parseInt(t2)) - { - tmp_ment=t1+" "+t2+" to "+t5; - Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASNS"); - tmp_ment=t1+" "+t2+" to -"+t5; - Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASNOS"); - tmp_ment=t1+" -"+t2+" to -"+t5; - Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tAASNOS"); - tmp_ment=t1+" "+t2+" to "+t1+" "+t5; - Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASNAS"); - tmp_ment=t1+" "+t2+"-"+t5; - Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASNS"); - tmp_ment=t1+" "+t2+", "+t5+", and "+(Integer.parseInt(t5)+2); - Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASCSCCS"); - tmp_ment=t1+" -"+t2+", -"+t5+", and -"+(Integer.parseInt(t5)+2); - Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tAASC0SCC0S"); - } - } - } - - String Mention_tmp = Mention; - String tokens[]=TokenSTR.split(" ",-1); - - //For Repeat - HashMap Token2Num = new HashMap (); - for(int p=0;p AbbLFStatus_hash = new HashMap (); - for(String Pmid_LF : GNormPlus.PmidLF2Abb_hash.keySet()) - { - String pf[] = Pmid_LF.split("\\t",-1); - if(pf[0].equals(Pmid)) - { - String Abb = GNormPlus.PmidLF2Abb_hash.get(Pmid_LF); - String LF = pf[1]; - - Abb = Abb.replaceAll("([0-9])([A-Za-z])", "$1 $2"); - Abb = Abb.replaceAll("([A-Za-z])([0-9])", "$1 $2"); - Abb = Abb.replaceAll("([A-Z])([a-z])", "$1 $2"); - Abb = Abb.replaceAll("([a-z])([A-Z])", "$1 $2"); - Abb = Abb.replaceAll("([\\W])", " $1 "); - Abb = Abb.replaceAll("[ ]+", " "); - Abb = Abb.replaceAll("^[ ]+", ""); - - LF = LF.replaceAll("([0-9])([A-Za-z])", "$1 $2"); - LF = LF.replaceAll("([A-Za-z])([0-9])", "$1 $2"); - LF = LF.replaceAll("([A-Z])([a-z])", "$1 $2"); - LF = LF.replaceAll("([a-z])([A-Z])", "$1 $2"); - LF = LF.replaceAll("([\\W])", " $1 "); - LF = LF.replaceAll("[ ]+", " "); - LF = LF.replaceAll("^[ ]+", ""); - LF = LF.replaceAll("[ ]+$", ""); - - - Abb=Abb.replaceAll("([^A-Za-z0-9@ ])","\\\\$1"); - LF=LF.replaceAll("([^A-Za-z0-9@ ])","\\\\$1"); - Abb=Abb.toLowerCase(); - LF=LF.toLowerCase(); - Pattern ptmp1 = Pattern.compile("(.*)("+LF+")([ ]*\\([ ]*)("+Abb+")[ ]*\\).*"); - Matcher mtmp1 = ptmp1.matcher(TokenSTR.toLowerCase()); - Pattern ptmp2 = Pattern.compile("(.*)("+Abb+")([ ]*\\([ ]*)("+LF+")[ ]*\\).*"); - Matcher mtmp2 = ptmp2.matcher(TokenSTR.toLowerCase()); - int start_LF=0; - int last_LF=0; - int start_Abb=0; - int last_Abb=0; - if(mtmp1.find()) - { - start_LF = mtmp1.group(1).length(); - last_LF = start_LF+mtmp1.group(2).length(); - start_Abb = last_LF+mtmp1.group(3).length(); - last_Abb = start_Abb+mtmp1.group(4).length(); - } - else if(mtmp2.find()) - { - start_Abb = mtmp2.group(1).length(); - last_Abb = start_LF+mtmp2.group(2).length(); - start_LF = last_LF+mtmp2.group(3).length(); - last_LF = start_Abb+mtmp2.group(4).length(); - } - for(int l=start_LF;l0) - { - String B=tokens[p-1]; - B=B.replaceAll("[A-Za-z]+", "A"); - B=B.replaceAll("[0-9]+", "0"); - WSB="WSB:"+B; - } - if(p3){Num_num="N:4+";}else{Num_num="N:"+ tmp.length();} - - //Number of Uppercase [A-Z] - String Num_Uc=""; - tmp=tokens[p]; - tmp=tmp.replaceAll("[^A-Z]",""); - if(tmp.length()>3){Num_Uc="U:4+";}else{Num_Uc="U:"+ tmp.length();} - - //Number of Lowercase [a-z] - String Num_lc=""; - tmp=tokens[p]; - tmp=tmp.replaceAll("[^a-z]",""); - if(tmp.length()>3){Num_lc="L:4+";}else{Num_lc="L:"+ tmp.length();} - - //Number of ALL char - String Num_All=""; - if(tokens[p].length()>3){Num_All="A:4+";}else{Num_All="A:"+ tokens[p].length();} - - //specific character (;:,.->+_) - String SpecificC="__nil__"; - if(tokens[p].equals(";") || tokens[p].equals(":") || tokens[p].equals(",") || tokens[p].equals(".") || tokens[p].equals("-") || tokens[p].equals(">") || tokens[p].equals("+") || tokens[p].equals("_")) - { - SpecificC="-SpecificC1-"; - } - else if(tokens[p].equals("(") || tokens[p].equals(")")) - { - SpecificC="-SpecificC2-"; - } - else if(tokens[p].equals("{") || tokens[p].equals("}")) - { - SpecificC="-SpecificC3-"; - } - else if(tokens[p].equals("[") || tokens[p].equals("]")) - { - SpecificC="-SpecificC4-"; - } - else if(tokens[p].equals("\\") || tokens[p].equals("/")) - { - SpecificC="-SpecificC5-"; - } - - //Chemical Prefix/Suffix - String ChemPreSuf="__nil__"; - if(tokens[p].matches(".*(yl|ylidyne|oyl|sulfonyl)")){ChemPreSuf="-CHEMinlineSuffix-";} - else if(tokens[p].matches("(meth|eth|prop|tetracos).*")){ChemPreSuf="-CHEMalkaneStem-";} - else if(tokens[p].matches("(di|tri|tetra).*")){ChemPreSuf="-CHEMsimpleMultiplier-";} - else if(tokens[p].matches("(benzen|pyridin|toluen).*")){ChemPreSuf="-CHEMtrivialRing-";} - else if(tokens[p].matches(".*(one|ol|carboxylic|amide|ate|acid|ium|ylium|ide|uide|iran|olan|inan|pyrid|acrid|amid|keten|formazan|fydrazin)(s|)")){ChemPreSuf="-CHEMsuffix-";} - - //MentionType - String MentionType="__nil__"; - if(GNormPlus.SimConceptMention2Type_hash.containsKey(tokens[p])) - { - MentionType = "-"+GNormPlus.SimConceptMention2Type_hash.get(tokens[p])+"-"; - } - - //Protein symbols - String ProteinSym="__nil__"; - if(tokens[p].matches(".*(glutamine|glutamic|leucine|valine|isoleucine|lysine|alanine|glycine|aspartate|methionine|threonine|histidine|aspartic|asparticacid|arginine|asparagine|tryptophan|proline|phenylalanine|cysteine|serine|glutamate|tyrosine|stop|frameshift).*")){ChemPreSuf="-ProteinSymFull-";} - else if(tokens[p].matches("(cys|ile|ser|gln|met|asn|pro|lys|asp|thr|phe|ala|gly|his|leu|arg|trp|val|glu|tyr|fs|fsx)")){ChemPreSuf="-ProteinSymTri-";} - else if(tokens[p].matches("[CISQMNPKDTFAGHLRWVEYX]")){ChemPreSuf="-ProteinSymChar-";} - - //Repeat - String Repeat="__nil__"; - if(Token2Num.get(tokens[p])>1 && tokens[p].length()>1 && (!tokens[p].matches("([\\W\\-\\_0-9]+|and|or|alpha|beta|gamma|theta|zeta|delta|kappa|II|VI|IV|III)"))) - { - Repeat="-Repeat-"; - } - - //Patterns - String Pattern1 = tokens[p]; - if(Pattern1.matches(".*[\\W\\-\\_].*")) - { - Pattern1="__nil__"; - } - else - { - Pattern1=Pattern1.replaceAll("[A-Z]", "A"); - Pattern1=Pattern1.replaceAll("[a-z]", "a"); - Pattern1=Pattern1.replaceAll("[0-9]", "0"); - Pattern1="P1:"+Pattern1; - } - String Pattern2 = tokens[p]; - if(Pattern2.matches(".*[\\W\\-\\_].*")) - { - Pattern2="__nil__"; - } - else - { - Pattern2=Pattern2.replaceAll("[A-Za-z]", "a"); - Pattern2=Pattern2.replaceAll("[0-9]", "0"); - Pattern2="P2:"+Pattern2; - } - String Pattern3 = tokens[p]; - if(Pattern3.matches(".*[\\W\\-\\_].*")) - { - Pattern3="__nil__"; - } - else - { - Pattern3=Pattern3.replaceAll("[A-Z]+", "A"); - Pattern3=Pattern3.replaceAll("[a-z]+", "a"); - Pattern3=Pattern3.replaceAll("[0-9]+", "0"); - Pattern3="P3:"+Pattern3; - } - String Pattern4 = tokens[p]; - if(Pattern4.matches(".*[\\W\\-\\_].*")) - { - Pattern4="__nil__"; - } - else - { - Pattern4=Pattern4.replaceAll("[A-Za-z]+", "a"); - Pattern4=Pattern4.replaceAll("[0-9]+", "0"); - Pattern4="P4:"+Pattern4; - } - - //prefix - String prefix=""; - tmp=tokens[p]; - if(tmp.length()>=1){ prefix=tmp.substring(0, 1);}else{prefix="__nil__";} - if(tmp.length()>=2){ prefix=prefix+" "+tmp.substring(0, 2);}else{prefix=prefix+" __nil__";} - if(tmp.length()>=3){ prefix=prefix+" "+tmp.substring(0, 3);}else{prefix=prefix+" __nil__";} - if(tmp.length()>=4){ prefix=prefix+" "+tmp.substring(0, 4);}else{prefix=prefix+" __nil__";} - if(tmp.length()>=5){ prefix=prefix+" "+tmp.substring(0, 5);}else{prefix=prefix+" __nil__";} - - //suffix - String suffix=""; - tmp=tokens[p]; - if(tmp.length()>=1){ suffix=tmp.substring(tmp.length()-1, tmp.length());}else{suffix="__nil__";} - if(tmp.length()>=2){ suffix=suffix+" "+tmp.substring(tmp.length()-2, tmp.length());}else{suffix=suffix+" __nil__";} - if(tmp.length()>=3){ suffix=suffix+" "+tmp.substring(tmp.length()-3, tmp.length());}else{suffix=suffix+" __nil__";} - if(tmp.length()>=4){ suffix=suffix+" "+tmp.substring(tmp.length()-4, tmp.length());}else{suffix=suffix+" __nil__";} - if(tmp.length()>=5){ suffix=suffix+" "+tmp.substring(tmp.length()-5, tmp.length());}else{suffix=suffix+" __nil__";} - - //Abbreviation & Long Form - String AbbLF="__nil__"; - if(AbbLFStatus_hash.containsKey(Offset)) - { - AbbLF=AbbLFStatus_hash.get(Offset); - } - - String Status = ID.substring(p, p+1); - FileData.write(tokens[p]+" "+WSB+" "+WSF+" "+stem - +" "+Num_num+" "+Num_num+" "+Num_Uc+" "+Num_lc+" "+Num_All+" "+SpecificC - +" "+ChemPreSuf+" "+MentionType+" "+ProteinSym+" "+Repeat - +" "+Pattern1+" "+Pattern2+" "+Pattern3+" "+Pattern4 - +" "+prefix+" "+suffix+" "+AbbLF - +" "+Status+"\n"); - Offset=Offset+tokens[p].length()+1; - if(ID.length()>tokens.length) - { - System.out.println(ID+"\t"+TokenSTR); - } - } - FileData.write("\n"); - } - } - - } - } - FileData.close(); - } - catch(IOException e1){ System.out.println("[MR]: Input file is not exist.");} - } - public void FeatureExtraction_Test(String FilenameData) throws XMLStreamException - { - try - { - /** output files */ - BufferedWriter FileData = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameData), "UTF-8")); // .data - //NLP modules - SnowballStemmer stemmer = new englishStemmer(); - /** PMIDs : i */ - for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) - { - String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); - - /** Paragraphs : j */ - for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) - { - ArrayList Annotation = GNormPlus.BioCDocobj.Annotations.get(i).get(j); - /** Annotations : k - * 0 start - * 1 last - * 2 mention - * 3 type - * 4 id - */ - for (int k = 0; k < Annotation.size() ; k++) // k : Annotations - { - String anno[]=Annotation.get(k).split("\\t",-1); - String Mentions = anno[2]; - String Type = anno[3]; - String MentionArr[]=Mentions.split("\\|",-1); - if(Type.equals("Gene")) - { - for(int m=0;m Token2Num = new HashMap (); - for(int p=0;p AbbLFStatus_hash = new HashMap (); - for(String Pmid_LF : GNormPlus.PmidLF2Abb_hash.keySet()) - { - String pf[] = Pmid_LF.split("\\t",-1); - if(pf[0].equals(Pmid)) - { - String Abb = GNormPlus.PmidLF2Abb_hash.get(Pmid_LF); - String LF = pf[1]; - - Abb = Abb.replaceAll("([0-9])([A-Za-z])", "$1 $2"); - Abb = Abb.replaceAll("([A-Za-z])([0-9])", "$1 $2"); - Abb = Abb.replaceAll("([A-Z])([a-z])", "$1 $2"); - Abb = Abb.replaceAll("([a-z])([A-Z])", "$1 $2"); - Abb = Abb.replaceAll("([\\W])", " $1 "); - Abb = Abb.replaceAll("[ ]+", " "); - Abb = Abb.replaceAll("^[ ]+", ""); - - LF = LF.replaceAll("([0-9])([A-Za-z])", "$1 $2"); - LF = LF.replaceAll("([A-Za-z])([0-9])", "$1 $2"); - LF = LF.replaceAll("([A-Z])([a-z])", "$1 $2"); - LF = LF.replaceAll("([a-z])([A-Z])", "$1 $2"); - LF = LF.replaceAll("([\\W])", " $1 "); - LF = LF.replaceAll("[ ]+", " "); - LF = LF.replaceAll("^[ ]+", ""); - - - Abb=Abb.replaceAll("([\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\-\\=\\[\\]\\;\\'\\,\\.\\/\\{\\}\\|\\:\\?])","\\\\$1"); - LF=LF.replaceAll("([\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\-\\=\\[\\]\\;\\'\\,\\.\\/\\{\\}\\|\\:\\?])","\\\\$1"); - Abb=Abb.toLowerCase(); - LF=LF.toLowerCase(); - Pattern ptmp1 = Pattern.compile("(.*)" - + "("+LF+")" - + "([ ]*\\([ ]*)" - + "("+Abb+")" - + "[ ]*\\).*"); - Matcher mtmp1 = ptmp1.matcher(TokenSTR.toLowerCase()); - Pattern ptmp2 = Pattern.compile("(.*)" - + "("+Abb+")" - + "([ ]*\\([ ]*)" - + "("+LF+")" - + "[ ]*\\).*"); - Matcher mtmp2 = ptmp2.matcher(TokenSTR.toLowerCase()); - int start_LF=0; - int last_LF=0; - int start_Abb=0; - int last_Abb=0; - if(mtmp1.find()) - { - start_LF = mtmp1.group(1).length(); - last_LF = start_LF+mtmp1.group(2).length(); - start_Abb = last_LF+mtmp1.group(3).length(); - last_Abb = start_Abb+mtmp1.group(4).length(); - } - else if(mtmp2.find()) - { - start_Abb = mtmp2.group(1).length(); - last_Abb = start_LF+mtmp2.group(2).length(); - start_LF = last_LF+mtmp2.group(3).length(); - last_LF = start_Abb+mtmp2.group(4).length(); - } - for(int l=start_LF;l0) - { - String B=tokens[p-1]; - B=B.replaceAll("[A-Za-z]+", "A"); - B=B.replaceAll("[0-9]+", "0"); - WSB="WSB:"+B; - } - if(p3){Num_num="N:4+";}else{Num_num="N:"+ tmp.length();} - - //Number of Uppercase [A-Z] - String Num_Uc=""; - tmp=tokens[p]; - tmp=tmp.replaceAll("[^A-Z]",""); - if(tmp.length()>3){Num_Uc="U:4+";}else{Num_Uc="U:"+ tmp.length();} - - //Number of Lowercase [a-z] - String Num_lc=""; - tmp=tokens[p]; - tmp=tmp.replaceAll("[^a-z]",""); - if(tmp.length()>3){Num_lc="L:4+";}else{Num_lc="L:"+ tmp.length();} - - //Number of ALL char - String Num_All=""; - if(tokens[p].length()>3){Num_All="A:4+";}else{Num_All="A:"+ tokens[p].length();} - - //specific character (;:,.->+_) - String SpecificC="__nil__"; - if(tokens[p].equals(";") || tokens[p].equals(":") || tokens[p].equals(",") || tokens[p].equals(".") || tokens[p].equals("-") || tokens[p].equals(">") || tokens[p].equals("+") || tokens[p].equals("_")) - { - SpecificC="-SpecificC1-"; - } - else if(tokens[p].equals("(") || tokens[p].equals(")")) - { - SpecificC="-SpecificC2-"; - } - else if(tokens[p].equals("{") || tokens[p].equals("}")) - { - SpecificC="-SpecificC3-"; - } - else if(tokens[p].equals("[") || tokens[p].equals("]")) - { - SpecificC="-SpecificC4-"; - } - else if(tokens[p].equals("\\") || tokens[p].equals("/")) - { - SpecificC="-SpecificC5-"; - } - - //Chemical Prefix/Suffix - String ChemPreSuf="__nil__"; - if(tokens[p].matches(".*(yl|ylidyne|oyl|sulfonyl)")){ChemPreSuf="-CHEMinlineSuffix-";} - else if(tokens[p].matches("(meth|eth|prop|tetracos).*")){ChemPreSuf="-CHEMalkaneStem-";} - else if(tokens[p].matches("(di|tri|tetra).*")){ChemPreSuf="-CHEMsimpleMultiplier-";} - else if(tokens[p].matches("(benzen|pyridin|toluen).*")){ChemPreSuf="-CHEMtrivialRing-";} - else if(tokens[p].matches(".*(one|ol|carboxylic|amide|ate|acid|ium|ylium|ide|uide|iran|olan|inan|pyrid|acrid|amid|keten|formazan|fydrazin)(s|)")){ChemPreSuf="-CHEMsuffix-";} - - //MentionType - String MentionType="__nil__"; - if(GNormPlus.SimConceptMention2Type_hash.containsKey(tokens[p])) - { - MentionType = "-"+GNormPlus.SimConceptMention2Type_hash.get(tokens[p])+"-"; - } - - //Protein symbols - String ProteinSym="__nil__"; - if(tokens[p].matches(".*(glutamine|glutamic|leucine|valine|isoleucine|lysine|alanine|glycine|aspartate|methionine|threonine|histidine|aspartic|asparticacid|arginine|asparagine|tryptophan|proline|phenylalanine|cysteine|serine|glutamate|tyrosine|stop|frameshift).*")){ChemPreSuf="-ProteinSymFull-";} - else if(tokens[p].matches("(cys|ile|ser|gln|met|asn|pro|lys|asp|thr|phe|ala|gly|his|leu|arg|trp|val|glu|tyr|fs|fsx)")){ChemPreSuf="-ProteinSymTri-";} - else if(tokens[p].matches("[CISQMNPKDTFAGHLRWVEYX]")){ChemPreSuf="-ProteinSymChar-";} - - //Repeat - String Repeat="__nil__"; - if(Token2Num.get(tokens[p])>1 && tokens[p].length()>1 && (!tokens[p].matches("([\\W\\-\\_0-9]+|and|or|alpha|beta|gamma|theta|zeta|delta|kappa|II|VI|IV|III)"))) - { - Repeat="-Repeat-"; - } - - //Patterns - String Pattern1 = tokens[p]; - if(Pattern1.matches(".*[\\W\\-\\_].*")) - { - Pattern1="__nil__"; - } - else - { - Pattern1=Pattern1.replaceAll("[A-Z]", "A"); - Pattern1=Pattern1.replaceAll("[a-z]", "a"); - Pattern1=Pattern1.replaceAll("[0-9]", "0"); - Pattern1="P1:"+Pattern1; - } - String Pattern2 = tokens[p]; - if(Pattern2.matches(".*[\\W\\-\\_].*")) - { - Pattern2="__nil__"; - } - else - { - Pattern2=Pattern2.replaceAll("[A-Za-z]", "a"); - Pattern2=Pattern2.replaceAll("[0-9]", "0"); - Pattern2="P2:"+Pattern2; - } - String Pattern3 = tokens[p]; - if(Pattern3.matches(".*[\\W\\-\\_].*")) - { - Pattern3="__nil__"; - } - else - { - Pattern3=Pattern3.replaceAll("[A-Z]+", "A"); - Pattern3=Pattern3.replaceAll("[a-z]+", "a"); - Pattern3=Pattern3.replaceAll("[0-9]+", "0"); - Pattern3="P3:"+Pattern3; - } - String Pattern4 = tokens[p]; - if(Pattern4.matches(".*[\\W\\-\\_].*")) - { - Pattern4="__nil__"; - } - else - { - Pattern4=Pattern4.replaceAll("[A-Za-z]+", "a"); - Pattern4=Pattern4.replaceAll("[0-9]+", "0"); - Pattern4="P4:"+Pattern4; - } - - //prefix - String prefix=""; - tmp=tokens[p]; - if(tmp.length()>=1){ prefix=tmp.substring(0, 1);}else{prefix="__nil__";} - if(tmp.length()>=2){ prefix=prefix+" "+tmp.substring(0, 2);}else{prefix=prefix+" __nil__";} - if(tmp.length()>=3){ prefix=prefix+" "+tmp.substring(0, 3);}else{prefix=prefix+" __nil__";} - if(tmp.length()>=4){ prefix=prefix+" "+tmp.substring(0, 4);}else{prefix=prefix+" __nil__";} - if(tmp.length()>=5){ prefix=prefix+" "+tmp.substring(0, 5);}else{prefix=prefix+" __nil__";} - - //suffix - String suffix=""; - tmp=tokens[p]; - if(tmp.length()>=1){ suffix=tmp.substring(tmp.length()-1, tmp.length());}else{suffix="__nil__";} - if(tmp.length()>=2){ suffix=suffix+" "+tmp.substring(tmp.length()-2, tmp.length());}else{suffix=suffix+" __nil__";} - if(tmp.length()>=3){ suffix=suffix+" "+tmp.substring(tmp.length()-3, tmp.length());}else{suffix=suffix+" __nil__";} - if(tmp.length()>=4){ suffix=suffix+" "+tmp.substring(tmp.length()-4, tmp.length());}else{suffix=suffix+" __nil__";} - if(tmp.length()>=5){ suffix=suffix+" "+tmp.substring(tmp.length()-5, tmp.length());}else{suffix=suffix+" __nil__";} - - //Abbreviation & Long Form - String AbbLF="__nil__"; - if(AbbLFStatus_hash.containsKey(Offset)) - { - AbbLF=AbbLFStatus_hash.get(Offset); - } - - FileData.write(tokens[p]+" "+WSB+" "+WSF+" "+stem - +" "+Num_num+" "+Num_num+" "+Num_Uc+" "+Num_lc+" "+Num_All+" "+SpecificC - +" "+ChemPreSuf+" "+MentionType+" "+ProteinSym+" "+Repeat - +" "+Pattern1+" "+Pattern2+" "+Pattern3+" "+Pattern4 - +" "+prefix+" "+suffix+" "+AbbLF+"\n"); - Offset=Offset+tokens[p].length()+1; - } - FileData.write("\n"); - } - } - } - - } - } - FileData.close(); - } - catch(IOException e1){ System.out.println("[MR]: Input file is not exist.");} - } - public void CRF_test(String model, String FilenameData,String FilenameOutput) throws IOException - { - File f = new File(FilenameOutput); - BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); - - Runtime runtime = Runtime.getRuntime(); - - String cmd ="CRF/crf_test -m "+model+" -o "+FilenameOutput+" "+FilenameData; - - try { - Process process = runtime.exec(cmd); - InputStream is = process.getInputStream(); - InputStreamReader isr = new InputStreamReader(is, "UTF-8"); - BufferedReader br = new BufferedReader(isr); - String line=""; - while ( (line = br.readLine()) != null) - { - fr.write(line); - fr.newLine(); - fr.flush(); - } - is.close(); - isr.close(); - br.close(); - fr.close(); - } - catch (IOException e) { - System.out.println(e); - runtime.exit(0); - } - } - public void CRF_learn(String model,String FilenameData) throws IOException - { - Runtime runtime = Runtime.getRuntime(); - - Process process = null; - String line = null; - InputStream is = null; - InputStreamReader isr = null; - BufferedReader br = null; - String cmd = "CRF/crf_learn -f 3 -c 4.0 CRF/template_SimConcept "+FilenameData+" "+model; - - try { - process = runtime.exec(cmd); - is = process.getInputStream(); - isr = new InputStreamReader(is, "UTF-8"); - br = new BufferedReader(isr); - while ( (line = br.readLine()) != null) - { - System.out.println(line); - System.out.flush(); - } - is.close(); - isr.close(); - br.close(); - } - catch (IOException e) { - System.out.println(e); - runtime.exit(0); - } - } - public void ReadCRFresult(String Filename,String FilenameOutput,String FilenameBioC) throws XMLStreamException, IOException - { - /** load CRF output */ - ArrayList outputArr1 = new ArrayList(); - BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameOutput), "UTF-8")); - String line; - while ((line = inputfile.readLine()) != null) - { - outputArr1.add(line); - } - inputfile.close(); - - /** - * Recognize the mentions which can be simplified - */ - int Count_mention=0; - boolean Simplified=false; - String Mention=""; - String Mention_NoSpace=""; - String States=""; - HashMap Mentions_hash = new HashMap(); - HashMap States_hash = new HashMap(); - HashMap Output_Split_mention_Ind = new HashMap(); - HashMap Output_Split_mention = new HashMap(); - for(int i=0;i Split_mention = new ArrayList(); - ArrayList Split_state = new ArrayList(); - String tmp_mention=""; - String tmp_state=""; - /** - * count = Mentions_count.get(i) : # of the mention in the corpus (543) - * Mentions_hash.get(count) : Original Mention (ORP - 1 to ORP - 6) - * States_hash.get(count) : States (AASNOOS) - */ - - String TokenArr[]=Mentions_hash.get(MNoSpace).split(" ",-1); - String StateArr[]=States_hash.get(MNoSpace).split("",-1); - - //refinement : isn't used - Pattern ptmp1 = Pattern.compile("^([S]+)([CN])([S]+)$"); - Matcher mtmp1 = ptmp1.matcher(States_hash.get(MNoSpace)); - if(mtmp1.find()) - { - States_hash.put(MNoSpace, mtmp1.group(1)+"J"+mtmp1.group(3)); - } - - //Split BE - int len=TokenArr.length; - if(StateArr.length0) - { - Split_mention.add(tmp_mention); - Split_state.add(tmp_state); - } - tmp_mention = ""; - tmp_state = ""; - } - else //CNBF - { - tmp_mention = tmp_mention + TokenArr[s] + " "; - tmp_state = tmp_state + StateArr[s]; - } - } - if(!tmp_mention.equals("")) - { - Split_mention.add(tmp_mention); - Split_state.add(tmp_state); - } - - //Split B/F - for(int m=0;m strainsX = new ArrayList(); - ArrayList STAstrainsX = new ArrayList(); - String each_token[] = Split_mention.get(m).split(" "); - String each_state[] = Split_state.get(m).split(""); - for(int s=0;s strainsCN = new ArrayList(); - String CorN=""; - - String each_token[] = Split_mention.get(m).split(" ",-1); - String each_state[] = Split_state.get(m).split("",-1); - - for(int k=0;k=4) - { - A=A.replace("s $", ""); - } - A=A+"STRAINXXX"; - strainCN=strainCN+each_token[k]+" "; - CNO_continous=0; - } - else if(each_state[k].matches("[CN]") && CNO_continous==0) - { - CorN=each_state[k]; - strainsCN.add(strainCN); - strainCN=""; - CNO_continous++; - } - else if(each_state[k].equals("J")) - { - if(!strainCN.equals("")){strainsCN.add(strainCN);} - - A=A.replaceAll("STRAINXXXSTRAINXXX","STRAINXXX"); - A=A.replaceAll("STRAINXXXSTRAINXXX","STRAINXXX"); - - ptmp1 = Pattern.compile("^(.+)s (.*)$"); - mtmp1 = ptmp1.matcher(A); - if(mtmp1.find() && mtmp1.group(1).length()>=3 ) - { - A = mtmp1.group(1)+ " "+mtmp1.group(2); - } - - if(CorN.equals("C")) - { - for(int x=0;x2 && (tmp.substring(tmp.length()-2, tmp.length()-2).equals(" "))) - { - tmp = tmp.substring(0,tmp.length()-2); - } - if(Output_Split_mention_Ind.containsKey(MNoSpace)) - { - Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); - } - else - { - Output_Split_mention_Ind.put(MNoSpace, tmp); - } - } - } - else if(CorN.equals("N")) - { - if(strainsCN.contains(0) && strainsCN.contains(1)) - { - String strain1= strainsCN.get(0).replaceAll(" ", ""); - String strain2= strainsCN.get(1).replaceAll(" ", ""); - if(strain1.matches("[0-9]+") && strain2.matches("[0-9]+")) - { - if(Integer.parseInt(strain2)-Integer.parseInt(strain1)<=20) - { - for(int strCount=Integer.parseInt(strain1);strCount<=Integer.parseInt(strain2);strCount++) - { - String tmp=A; - tmp = tmp.replace("STRAINXXX", Integer.toString(strCount)); - tmp = tmp.replaceAll("[ ]+"," "); - if(tmp.length()>2 && tmp.substring(tmp.length()-2, tmp.length()-2).equals(" ")) - { - tmp = tmp.substring(0,tmp.length()-2); - } - if(Output_Split_mention_Ind.containsKey(MNoSpace)) - { - Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); - } - else - { - Output_Split_mention_Ind.put(MNoSpace, tmp); - } - } - } - } - else if(strain1.matches("[A-Z]+ ") && strain2.matches("[A-Z]+ ")) - { - int strInt1 = (int) strain1.replaceAll(" ", "").charAt(0); - int strInt2 = (int) strain2.replaceAll(" ", "").charAt(0); - if(strInt2-strInt1<=20) - { - for(int strCount=strInt1;strCount<=strInt2;strCount++) - { - String tmp=A; - tmp = tmp.replace("STRAINXXX", Integer.toString(strCount)); - tmp = tmp.replaceAll("[ ]+"," "); - if(tmp.length()>2 && tmp.substring(tmp.length()-2, tmp.length()-2).equals(" ")) - { - tmp = tmp.substring(0,tmp.length()-2); - } - if(Output_Split_mention_Ind.containsKey(MNoSpace)) - { - Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); - } - else - { - Output_Split_mention_Ind.put(MNoSpace, tmp); - } - } - } - } - else - { - if(Output_Split_mention.containsKey(MNoSpace)) - { - Output_Split_mention.put(MNoSpace, Output_Split_mention.get(MNoSpace)+"|"+Split_mention.get(m)); - } - else - { - Output_Split_mention.put(MNoSpace, Split_mention.get(m)); - } - } - } - } - else - { - if(Output_Split_mention.containsKey(MNoSpace)) - { - Output_Split_mention.put(MNoSpace, Output_Split_mention.get(MNoSpace)+"|"+Split_mention.get(m)); - } - else - { - Output_Split_mention.put(MNoSpace, Split_mention.get(m)); - } - } - - A=""; - strainCN=""; - CNO_continous=0; - strainsCN = new ArrayList(); - CorN=""; - } - } - if(!strainCN.equals("")){strainsCN.add(strainCN);} - - A=A.replaceAll("(STRAINXXX){2,}","STRAINXXX"); - - ptmp1 = Pattern.compile("^(.+)s (.*)$"); - mtmp1 = ptmp1.matcher(A); - if(mtmp1.find() && mtmp1.group(1).length()>=3 ) - { - A = mtmp1.group(1)+ " "+mtmp1.group(2); - } - - if(CorN.equals("C")) - { - for(int x=0;x2 && (tmp.substring(tmp.length()-2, tmp.length()-2).equals(" "))) - { - tmp = tmp.substring(0,tmp.length()-2); - } - if(Output_Split_mention_Ind.containsKey(MNoSpace)) - { - Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); - } - else - { - Output_Split_mention_Ind.put(MNoSpace, tmp); - } - } - } - else if(CorN.equals("N")) - { - if(strainsCN.size()==2) - { - String strain1= strainsCN.get(0).replaceAll(" ", ""); - String strain2= strainsCN.get(1).replaceAll(" ", ""); - if(strain1.matches("[0-9]{1,7}") && strain2.matches("[0-9]{1,7}")) - { - if(Integer.parseInt(strain2)-Integer.parseInt(strain1)<=20) - { - for(int strCount=Integer.parseInt(strain1);strCount<=Integer.parseInt(strain2);strCount++) - { - String tmp=A; - tmp = tmp.replace("STRAINXXX", Integer.toString(strCount)); - tmp = tmp.replaceAll("[ ]+"," "); - if(tmp.length()>2 && tmp.substring(tmp.length()-2, tmp.length()-2).equals(" ")) - { - tmp = tmp.substring(0,tmp.length()-2); - } - if(Output_Split_mention_Ind.containsKey(MNoSpace)) - { - Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); - } - else - { - Output_Split_mention_Ind.put(MNoSpace, tmp); - } - } - } - } - else if(strain1.matches("[A-Z]+ ") && strain2.matches("[A-Z]+ ")) - { - int strInt1 = (int) strain1.replaceAll(" ", "").charAt(0); - int strInt2 = (int) strain2.replaceAll(" ", "").charAt(0); - if(strInt2-strInt1<=20) - { - for(int strCount=strInt1;strCount<=strInt2;strCount++) - { - String tmp=A; - tmp = tmp.replace("STRAINXXX", Integer.toString(strCount)); - tmp = tmp.replaceAll("[ ]+"," "); - if(tmp.length()>2 && tmp.substring(tmp.length()-2, tmp.length()-2).equals(" ")) - { - tmp = tmp.substring(0,tmp.length()-2); - } - if(Output_Split_mention_Ind.containsKey(MNoSpace)) - { - Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); - } - else - { - Output_Split_mention_Ind.put(MNoSpace, tmp); - } - } - } - } - else - { - if(Output_Split_mention.containsKey(MNoSpace)) - { - Output_Split_mention.put(MNoSpace, Output_Split_mention.get(MNoSpace)+"|"+Split_mention.get(m)); - } - else - { - Output_Split_mention.put(MNoSpace, Split_mention.get(m)); - } - } - } - } - else - { - if(Output_Split_mention.containsKey(MNoSpace)) - { - Output_Split_mention.put(MNoSpace, Output_Split_mention.get(MNoSpace)+"|"+Split_mention.get(m)); - } - else - { - Output_Split_mention.put(MNoSpace, Split_mention.get(m)); - } - } - } - } - - for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) - { - for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) - { - int Annotation_Num = GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); - for (int k = 0; k < Annotation_Num ; k++) // k : Annotations - { - String anno[]=GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\\t"); //Mention - String MenArr[]=anno[2].split("\\|"); - for(int m=0;m Mentions = new ArrayList(); - for(int m=0;m ii - // ii --> 2 - for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) - { - for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) - { - int Annotation_Num = GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); - for (int k = 0; k < Annotation_Num ; k++) // k : Annotations - { - String anno[]=GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\\t"); //Mention - String MenArr[]=anno[2].split("\\|"); - HashMap Mentions = new HashMap(); - for(int m=0;m Annotation = GNormPlus.BioCDocobj.Annotations.get(i).get(j); + /** Annotations : k + * 0 start + * 1 last + * 2 mention + * 3 type + * 4 id + */ + int Inital_Annotation_size=Annotation.size(); + for (int k = 0; k < Annotation.size() ; k++) // k : Annotations + { + String anno[]=Annotation.get(k).split("\\t",-1); + int MentionStart= Integer.parseInt(anno[0]); + int MentionLast= Integer.parseInt(anno[1]); + String Mention = anno[2]; + String Type = anno[3]; + if(anno.length>4) + { + String ID = anno[4]; + + String TokenSTR = Mention; + TokenSTR = TokenSTR.replaceAll("([0-9])([A-Za-z])", "$1 $2"); + TokenSTR = TokenSTR.replaceAll("([A-Za-z])([0-9])", "$1 $2"); + TokenSTR = TokenSTR.replaceAll("([A-Z])([a-z])", "$1 $2"); + TokenSTR = TokenSTR.replaceAll("([a-z])([A-Z])", "$1 $2"); + TokenSTR = TokenSTR.replaceAll("([\\W])", " $1 "); + TokenSTR = TokenSTR.replaceAll("[ ]+", " "); + TokenSTR = TokenSTR.replaceAll("^[ ]+", ""); + TokenSTR = TokenSTR.replaceAll("[ ]+$", ""); + + /* + * Only for Gene + */ + if(ID.equals("ASJAS") && kInteger.parseInt(t2)) + { + tmp_ment=t1+" "+t2+" to "+t5; + Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASNS"); + tmp_ment=t1+" "+t2+" to -"+t5; + Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASNOS"); + tmp_ment=t1+" -"+t2+" to -"+t5; + Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tAASNOS"); + tmp_ment=t1+" "+t2+" to "+t1+" "+t5; + Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASNAS"); + tmp_ment=t1+" "+t2+"-"+t5; + Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASNS"); + tmp_ment=t1+" "+t2+", "+t5+", and "+(Integer.parseInt(t5)+2); + Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tASCSCCS"); + tmp_ment=t1+" -"+t2+", -"+t5+", and -"+(Integer.parseInt(t5)+2); + Annotation.add(MentionStart+"\t"+MentionLast+"\t"+tmp_ment+"\t"+Type+"\tAASC0SCC0S"); + } + } + } + + String Mention_tmp = Mention; + String tokens[]=TokenSTR.split(" ",-1); + + //For Repeat + HashMap Token2Num = new HashMap (); + for(int p=0;p AbbLFStatus_hash = new HashMap (); + for(String Pmid_LF : GNormPlus.PmidLF2Abb_hash.keySet()) + { + String pf[] = Pmid_LF.split("\\t",-1); + if(pf[0].equals(Pmid)) + { + String Abb = GNormPlus.PmidLF2Abb_hash.get(Pmid_LF); + String LF = pf[1]; + + Abb = Abb.replaceAll("([0-9])([A-Za-z])", "$1 $2"); + Abb = Abb.replaceAll("([A-Za-z])([0-9])", "$1 $2"); + Abb = Abb.replaceAll("([A-Z])([a-z])", "$1 $2"); + Abb = Abb.replaceAll("([a-z])([A-Z])", "$1 $2"); + Abb = Abb.replaceAll("([\\W])", " $1 "); + Abb = Abb.replaceAll("[ ]+", " "); + Abb = Abb.replaceAll("^[ ]+", ""); + + LF = LF.replaceAll("([0-9])([A-Za-z])", "$1 $2"); + LF = LF.replaceAll("([A-Za-z])([0-9])", "$1 $2"); + LF = LF.replaceAll("([A-Z])([a-z])", "$1 $2"); + LF = LF.replaceAll("([a-z])([A-Z])", "$1 $2"); + LF = LF.replaceAll("([\\W])", " $1 "); + LF = LF.replaceAll("[ ]+", " "); + LF = LF.replaceAll("^[ ]+", ""); + LF = LF.replaceAll("[ ]+$", ""); + + + Abb=Abb.replaceAll("([^A-Za-z0-9@ ])","\\\\$1"); + LF=LF.replaceAll("([^A-Za-z0-9@ ])","\\\\$1"); + Abb=Abb.toLowerCase(); + LF=LF.toLowerCase(); + Pattern ptmp1 = Pattern.compile("(.*)("+LF+")([ ]*\\([ ]*)("+Abb+")[ ]*\\).*"); + Matcher mtmp1 = ptmp1.matcher(TokenSTR.toLowerCase()); + Pattern ptmp2 = Pattern.compile("(.*)("+Abb+")([ ]*\\([ ]*)("+LF+")[ ]*\\).*"); + Matcher mtmp2 = ptmp2.matcher(TokenSTR.toLowerCase()); + int start_LF=0; + int last_LF=0; + int start_Abb=0; + int last_Abb=0; + if(mtmp1.find()) + { + start_LF = mtmp1.group(1).length(); + last_LF = start_LF+mtmp1.group(2).length(); + start_Abb = last_LF+mtmp1.group(3).length(); + last_Abb = start_Abb+mtmp1.group(4).length(); + } + else if(mtmp2.find()) + { + start_Abb = mtmp2.group(1).length(); + last_Abb = start_LF+mtmp2.group(2).length(); + start_LF = last_LF+mtmp2.group(3).length(); + last_LF = start_Abb+mtmp2.group(4).length(); + } + for(int l=start_LF;l0) + { + String B=tokens[p-1]; + B=B.replaceAll("[A-Za-z]+", "A"); + B=B.replaceAll("[0-9]+", "0"); + WSB="WSB:"+B; + } + if(p3){Num_num="N:4+";}else{Num_num="N:"+ tmp.length();} + + //Number of Uppercase [A-Z] + String Num_Uc=""; + tmp=tokens[p]; + tmp=tmp.replaceAll("[^A-Z]",""); + if(tmp.length()>3){Num_Uc="U:4+";}else{Num_Uc="U:"+ tmp.length();} + + //Number of Lowercase [a-z] + String Num_lc=""; + tmp=tokens[p]; + tmp=tmp.replaceAll("[^a-z]",""); + if(tmp.length()>3){Num_lc="L:4+";}else{Num_lc="L:"+ tmp.length();} + + //Number of ALL char + String Num_All=""; + if(tokens[p].length()>3){Num_All="A:4+";}else{Num_All="A:"+ tokens[p].length();} + + //specific character (;:,.->+_) + String SpecificC="__nil__"; + if(tokens[p].equals(";") || tokens[p].equals(":") || tokens[p].equals(",") || tokens[p].equals(".") || tokens[p].equals("-") || tokens[p].equals(">") || tokens[p].equals("+") || tokens[p].equals("_")) + { + SpecificC="-SpecificC1-"; + } + else if(tokens[p].equals("(") || tokens[p].equals(")")) + { + SpecificC="-SpecificC2-"; + } + else if(tokens[p].equals("{") || tokens[p].equals("}")) + { + SpecificC="-SpecificC3-"; + } + else if(tokens[p].equals("[") || tokens[p].equals("]")) + { + SpecificC="-SpecificC4-"; + } + else if(tokens[p].equals("\\") || tokens[p].equals("/")) + { + SpecificC="-SpecificC5-"; + } + + //Chemical Prefix/Suffix + String ChemPreSuf="__nil__"; + if(tokens[p].matches(".*(yl|ylidyne|oyl|sulfonyl)")){ChemPreSuf="-CHEMinlineSuffix-";} + else if(tokens[p].matches("(meth|eth|prop|tetracos).*")){ChemPreSuf="-CHEMalkaneStem-";} + else if(tokens[p].matches("(di|tri|tetra).*")){ChemPreSuf="-CHEMsimpleMultiplier-";} + else if(tokens[p].matches("(benzen|pyridin|toluen).*")){ChemPreSuf="-CHEMtrivialRing-";} + else if(tokens[p].matches(".*(one|ol|carboxylic|amide|ate|acid|ium|ylium|ide|uide|iran|olan|inan|pyrid|acrid|amid|keten|formazan|fydrazin)(s|)")){ChemPreSuf="-CHEMsuffix-";} + + //MentionType + String MentionType="__nil__"; + if(GNormPlus.SimConceptMention2Type_hash.containsKey(tokens[p])) + { + MentionType = "-"+GNormPlus.SimConceptMention2Type_hash.get(tokens[p])+"-"; + } + + //Protein symbols + String ProteinSym="__nil__"; + if(tokens[p].matches(".*(glutamine|glutamic|leucine|valine|isoleucine|lysine|alanine|glycine|aspartate|methionine|threonine|histidine|aspartic|asparticacid|arginine|asparagine|tryptophan|proline|phenylalanine|cysteine|serine|glutamate|tyrosine|stop|frameshift).*")){ChemPreSuf="-ProteinSymFull-";} + else if(tokens[p].matches("(cys|ile|ser|gln|met|asn|pro|lys|asp|thr|phe|ala|gly|his|leu|arg|trp|val|glu|tyr|fs|fsx)")){ChemPreSuf="-ProteinSymTri-";} + else if(tokens[p].matches("[CISQMNPKDTFAGHLRWVEYX]")){ChemPreSuf="-ProteinSymChar-";} + + //Repeat + String Repeat="__nil__"; + if(Token2Num.get(tokens[p])>1 && tokens[p].length()>1 && (!tokens[p].matches("([\\W\\-\\_0-9]+|and|or|alpha|beta|gamma|theta|zeta|delta|kappa|II|VI|IV|III)"))) + { + Repeat="-Repeat-"; + } + + //Patterns + String Pattern1 = tokens[p]; + if(Pattern1.matches(".*[\\W\\-\\_].*")) + { + Pattern1="__nil__"; + } + else + { + Pattern1=Pattern1.replaceAll("[A-Z]", "A"); + Pattern1=Pattern1.replaceAll("[a-z]", "a"); + Pattern1=Pattern1.replaceAll("[0-9]", "0"); + Pattern1="P1:"+Pattern1; + } + String Pattern2 = tokens[p]; + if(Pattern2.matches(".*[\\W\\-\\_].*")) + { + Pattern2="__nil__"; + } + else + { + Pattern2=Pattern2.replaceAll("[A-Za-z]", "a"); + Pattern2=Pattern2.replaceAll("[0-9]", "0"); + Pattern2="P2:"+Pattern2; + } + String Pattern3 = tokens[p]; + if(Pattern3.matches(".*[\\W\\-\\_].*")) + { + Pattern3="__nil__"; + } + else + { + Pattern3=Pattern3.replaceAll("[A-Z]+", "A"); + Pattern3=Pattern3.replaceAll("[a-z]+", "a"); + Pattern3=Pattern3.replaceAll("[0-9]+", "0"); + Pattern3="P3:"+Pattern3; + } + String Pattern4 = tokens[p]; + if(Pattern4.matches(".*[\\W\\-\\_].*")) + { + Pattern4="__nil__"; + } + else + { + Pattern4=Pattern4.replaceAll("[A-Za-z]+", "a"); + Pattern4=Pattern4.replaceAll("[0-9]+", "0"); + Pattern4="P4:"+Pattern4; + } + + //prefix + String prefix=""; + tmp=tokens[p]; + if(tmp.length()>=1){ prefix=tmp.substring(0, 1);}else{prefix="__nil__";} + if(tmp.length()>=2){ prefix=prefix+" "+tmp.substring(0, 2);}else{prefix=prefix+" __nil__";} + if(tmp.length()>=3){ prefix=prefix+" "+tmp.substring(0, 3);}else{prefix=prefix+" __nil__";} + if(tmp.length()>=4){ prefix=prefix+" "+tmp.substring(0, 4);}else{prefix=prefix+" __nil__";} + if(tmp.length()>=5){ prefix=prefix+" "+tmp.substring(0, 5);}else{prefix=prefix+" __nil__";} + + //suffix + String suffix=""; + tmp=tokens[p]; + if(tmp.length()>=1){ suffix=tmp.substring(tmp.length()-1, tmp.length());}else{suffix="__nil__";} + if(tmp.length()>=2){ suffix=suffix+" "+tmp.substring(tmp.length()-2, tmp.length());}else{suffix=suffix+" __nil__";} + if(tmp.length()>=3){ suffix=suffix+" "+tmp.substring(tmp.length()-3, tmp.length());}else{suffix=suffix+" __nil__";} + if(tmp.length()>=4){ suffix=suffix+" "+tmp.substring(tmp.length()-4, tmp.length());}else{suffix=suffix+" __nil__";} + if(tmp.length()>=5){ suffix=suffix+" "+tmp.substring(tmp.length()-5, tmp.length());}else{suffix=suffix+" __nil__";} + + //Abbreviation & Long Form + String AbbLF="__nil__"; + if(AbbLFStatus_hash.containsKey(Offset)) + { + AbbLF=AbbLFStatus_hash.get(Offset); + } + + String Status = ID.substring(p, p+1); + FileData.write(tokens[p]+" "+WSB+" "+WSF+" "+stem + +" "+Num_num+" "+Num_num+" "+Num_Uc+" "+Num_lc+" "+Num_All+" "+SpecificC + +" "+ChemPreSuf+" "+MentionType+" "+ProteinSym+" "+Repeat + +" "+Pattern1+" "+Pattern2+" "+Pattern3+" "+Pattern4 + +" "+prefix+" "+suffix+" "+AbbLF + +" "+Status+"\n"); + Offset=Offset+tokens[p].length()+1; + if(ID.length()>tokens.length) + { + System.out.println(ID+"\t"+TokenSTR); + } + } + FileData.write("\n"); + } + } + + } + } + FileData.close(); + } + catch(IOException e1){ System.out.println("[MR]: Input file is not exist.");} + } + public void FeatureExtraction_Test(String FilenameData) throws XMLStreamException + { + try + { + /** output files */ + BufferedWriter FileData = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(FilenameData), "UTF-8")); // .data + //NLP modules + SnowballStemmer stemmer = new englishStemmer(); + /** PMIDs : i */ + for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) + { + String Pmid = GNormPlus.BioCDocobj.PMIDs.get(i); + + /** Paragraphs : j */ + for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) + { + ArrayList Annotation = GNormPlus.BioCDocobj.Annotations.get(i).get(j); + /** Annotations : k + * 0 start + * 1 last + * 2 mention + * 3 type + * 4 id + */ + for (int k = 0; k < Annotation.size() ; k++) // k : Annotations + { + String anno[]=Annotation.get(k).split("\\t",-1); + String Mentions = anno[2]; + String Type = anno[3]; + String MentionArr[]=Mentions.split("\\|",-1); + if(Type.equals("Gene")) + { + for(int m=0;m Token2Num = new HashMap (); + for(int p=0;p AbbLFStatus_hash = new HashMap (); + for(String Pmid_LF : GNormPlus.PmidLF2Abb_hash.keySet()) + { + String pf[] = Pmid_LF.split("\\t",-1); + if(pf[0].equals(Pmid)) + { + String Abb = GNormPlus.PmidLF2Abb_hash.get(Pmid_LF); + String LF = pf[1]; + + Abb = Abb.replaceAll("([0-9])([A-Za-z])", "$1 $2"); + Abb = Abb.replaceAll("([A-Za-z])([0-9])", "$1 $2"); + Abb = Abb.replaceAll("([A-Z])([a-z])", "$1 $2"); + Abb = Abb.replaceAll("([a-z])([A-Z])", "$1 $2"); + Abb = Abb.replaceAll("([\\W])", " $1 "); + Abb = Abb.replaceAll("[ ]+", " "); + Abb = Abb.replaceAll("^[ ]+", ""); + + LF = LF.replaceAll("([0-9])([A-Za-z])", "$1 $2"); + LF = LF.replaceAll("([A-Za-z])([0-9])", "$1 $2"); + LF = LF.replaceAll("([A-Z])([a-z])", "$1 $2"); + LF = LF.replaceAll("([a-z])([A-Z])", "$1 $2"); + LF = LF.replaceAll("([\\W])", " $1 "); + LF = LF.replaceAll("[ ]+", " "); + LF = LF.replaceAll("^[ ]+", ""); + + + Abb=Abb.replaceAll("([\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\-\\=\\[\\]\\;\\'\\,\\.\\/\\{\\}\\|\\:\\?])","\\\\$1"); + LF=LF.replaceAll("([\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\_\\+\\-\\=\\[\\]\\;\\'\\,\\.\\/\\{\\}\\|\\:\\?])","\\\\$1"); + Abb=Abb.toLowerCase(); + LF=LF.toLowerCase(); + Pattern ptmp1 = Pattern.compile("(.*)" + + "("+LF+")" + + "([ ]*\\([ ]*)" + + "("+Abb+")" + + "[ ]*\\).*"); + Matcher mtmp1 = ptmp1.matcher(TokenSTR.toLowerCase()); + Pattern ptmp2 = Pattern.compile("(.*)" + + "("+Abb+")" + + "([ ]*\\([ ]*)" + + "("+LF+")" + + "[ ]*\\).*"); + Matcher mtmp2 = ptmp2.matcher(TokenSTR.toLowerCase()); + int start_LF=0; + int last_LF=0; + int start_Abb=0; + int last_Abb=0; + if(mtmp1.find()) + { + start_LF = mtmp1.group(1).length(); + last_LF = start_LF+mtmp1.group(2).length(); + start_Abb = last_LF+mtmp1.group(3).length(); + last_Abb = start_Abb+mtmp1.group(4).length(); + } + else if(mtmp2.find()) + { + start_Abb = mtmp2.group(1).length(); + last_Abb = start_LF+mtmp2.group(2).length(); + start_LF = last_LF+mtmp2.group(3).length(); + last_LF = start_Abb+mtmp2.group(4).length(); + } + for(int l=start_LF;l0) + { + String B=tokens[p-1]; + B=B.replaceAll("[A-Za-z]+", "A"); + B=B.replaceAll("[0-9]+", "0"); + WSB="WSB:"+B; + } + if(p3){Num_num="N:4+";}else{Num_num="N:"+ tmp.length();} + + //Number of Uppercase [A-Z] + String Num_Uc=""; + tmp=tokens[p]; + tmp=tmp.replaceAll("[^A-Z]",""); + if(tmp.length()>3){Num_Uc="U:4+";}else{Num_Uc="U:"+ tmp.length();} + + //Number of Lowercase [a-z] + String Num_lc=""; + tmp=tokens[p]; + tmp=tmp.replaceAll("[^a-z]",""); + if(tmp.length()>3){Num_lc="L:4+";}else{Num_lc="L:"+ tmp.length();} + + //Number of ALL char + String Num_All=""; + if(tokens[p].length()>3){Num_All="A:4+";}else{Num_All="A:"+ tokens[p].length();} + + //specific character (;:,.->+_) + String SpecificC="__nil__"; + if(tokens[p].equals(";") || tokens[p].equals(":") || tokens[p].equals(",") || tokens[p].equals(".") || tokens[p].equals("-") || tokens[p].equals(">") || tokens[p].equals("+") || tokens[p].equals("_")) + { + SpecificC="-SpecificC1-"; + } + else if(tokens[p].equals("(") || tokens[p].equals(")")) + { + SpecificC="-SpecificC2-"; + } + else if(tokens[p].equals("{") || tokens[p].equals("}")) + { + SpecificC="-SpecificC3-"; + } + else if(tokens[p].equals("[") || tokens[p].equals("]")) + { + SpecificC="-SpecificC4-"; + } + else if(tokens[p].equals("\\") || tokens[p].equals("/")) + { + SpecificC="-SpecificC5-"; + } + + //Chemical Prefix/Suffix + String ChemPreSuf="__nil__"; + if(tokens[p].matches(".*(yl|ylidyne|oyl|sulfonyl)")){ChemPreSuf="-CHEMinlineSuffix-";} + else if(tokens[p].matches("(meth|eth|prop|tetracos).*")){ChemPreSuf="-CHEMalkaneStem-";} + else if(tokens[p].matches("(di|tri|tetra).*")){ChemPreSuf="-CHEMsimpleMultiplier-";} + else if(tokens[p].matches("(benzen|pyridin|toluen).*")){ChemPreSuf="-CHEMtrivialRing-";} + else if(tokens[p].matches(".*(one|ol|carboxylic|amide|ate|acid|ium|ylium|ide|uide|iran|olan|inan|pyrid|acrid|amid|keten|formazan|fydrazin)(s|)")){ChemPreSuf="-CHEMsuffix-";} + + //MentionType + String MentionType="__nil__"; + if(GNormPlus.SimConceptMention2Type_hash.containsKey(tokens[p])) + { + MentionType = "-"+GNormPlus.SimConceptMention2Type_hash.get(tokens[p])+"-"; + } + + //Protein symbols + String ProteinSym="__nil__"; + if(tokens[p].matches(".*(glutamine|glutamic|leucine|valine|isoleucine|lysine|alanine|glycine|aspartate|methionine|threonine|histidine|aspartic|asparticacid|arginine|asparagine|tryptophan|proline|phenylalanine|cysteine|serine|glutamate|tyrosine|stop|frameshift).*")){ChemPreSuf="-ProteinSymFull-";} + else if(tokens[p].matches("(cys|ile|ser|gln|met|asn|pro|lys|asp|thr|phe|ala|gly|his|leu|arg|trp|val|glu|tyr|fs|fsx)")){ChemPreSuf="-ProteinSymTri-";} + else if(tokens[p].matches("[CISQMNPKDTFAGHLRWVEYX]")){ChemPreSuf="-ProteinSymChar-";} + + //Repeat + String Repeat="__nil__"; + if(Token2Num.get(tokens[p])>1 && tokens[p].length()>1 && (!tokens[p].matches("([\\W\\-\\_0-9]+|and|or|alpha|beta|gamma|theta|zeta|delta|kappa|II|VI|IV|III)"))) + { + Repeat="-Repeat-"; + } + + //Patterns + String Pattern1 = tokens[p]; + if(Pattern1.matches(".*[\\W\\-\\_].*")) + { + Pattern1="__nil__"; + } + else + { + Pattern1=Pattern1.replaceAll("[A-Z]", "A"); + Pattern1=Pattern1.replaceAll("[a-z]", "a"); + Pattern1=Pattern1.replaceAll("[0-9]", "0"); + Pattern1="P1:"+Pattern1; + } + String Pattern2 = tokens[p]; + if(Pattern2.matches(".*[\\W\\-\\_].*")) + { + Pattern2="__nil__"; + } + else + { + Pattern2=Pattern2.replaceAll("[A-Za-z]", "a"); + Pattern2=Pattern2.replaceAll("[0-9]", "0"); + Pattern2="P2:"+Pattern2; + } + String Pattern3 = tokens[p]; + if(Pattern3.matches(".*[\\W\\-\\_].*")) + { + Pattern3="__nil__"; + } + else + { + Pattern3=Pattern3.replaceAll("[A-Z]+", "A"); + Pattern3=Pattern3.replaceAll("[a-z]+", "a"); + Pattern3=Pattern3.replaceAll("[0-9]+", "0"); + Pattern3="P3:"+Pattern3; + } + String Pattern4 = tokens[p]; + if(Pattern4.matches(".*[\\W\\-\\_].*")) + { + Pattern4="__nil__"; + } + else + { + Pattern4=Pattern4.replaceAll("[A-Za-z]+", "a"); + Pattern4=Pattern4.replaceAll("[0-9]+", "0"); + Pattern4="P4:"+Pattern4; + } + + //prefix + String prefix=""; + tmp=tokens[p]; + if(tmp.length()>=1){ prefix=tmp.substring(0, 1);}else{prefix="__nil__";} + if(tmp.length()>=2){ prefix=prefix+" "+tmp.substring(0, 2);}else{prefix=prefix+" __nil__";} + if(tmp.length()>=3){ prefix=prefix+" "+tmp.substring(0, 3);}else{prefix=prefix+" __nil__";} + if(tmp.length()>=4){ prefix=prefix+" "+tmp.substring(0, 4);}else{prefix=prefix+" __nil__";} + if(tmp.length()>=5){ prefix=prefix+" "+tmp.substring(0, 5);}else{prefix=prefix+" __nil__";} + + //suffix + String suffix=""; + tmp=tokens[p]; + if(tmp.length()>=1){ suffix=tmp.substring(tmp.length()-1, tmp.length());}else{suffix="__nil__";} + if(tmp.length()>=2){ suffix=suffix+" "+tmp.substring(tmp.length()-2, tmp.length());}else{suffix=suffix+" __nil__";} + if(tmp.length()>=3){ suffix=suffix+" "+tmp.substring(tmp.length()-3, tmp.length());}else{suffix=suffix+" __nil__";} + if(tmp.length()>=4){ suffix=suffix+" "+tmp.substring(tmp.length()-4, tmp.length());}else{suffix=suffix+" __nil__";} + if(tmp.length()>=5){ suffix=suffix+" "+tmp.substring(tmp.length()-5, tmp.length());}else{suffix=suffix+" __nil__";} + + //Abbreviation & Long Form + String AbbLF="__nil__"; + if(AbbLFStatus_hash.containsKey(Offset)) + { + AbbLF=AbbLFStatus_hash.get(Offset); + } + + FileData.write(tokens[p]+" "+WSB+" "+WSF+" "+stem + +" "+Num_num+" "+Num_num+" "+Num_Uc+" "+Num_lc+" "+Num_All+" "+SpecificC + +" "+ChemPreSuf+" "+MentionType+" "+ProteinSym+" "+Repeat + +" "+Pattern1+" "+Pattern2+" "+Pattern3+" "+Pattern4 + +" "+prefix+" "+suffix+" "+AbbLF+"\n"); + Offset=Offset+tokens[p].length()+1; + } + FileData.write("\n"); + } + } + } + + } + } + FileData.close(); + } + catch(IOException e1){ System.out.println("[MR]: Input file is not exist.");} + } + public void CRF_test(String model, String FilenameData,String FilenameOutput) throws IOException + { + File f = new File(FilenameOutput); + BufferedWriter fr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); + + Runtime runtime = Runtime.getRuntime(); + + String cmd ="CRF/crf_test -m "+model+" -o "+FilenameOutput+" "+FilenameData; + + try { + Process process = runtime.exec(cmd); + InputStream is = process.getInputStream(); + InputStreamReader isr = new InputStreamReader(is, "UTF-8"); + BufferedReader br = new BufferedReader(isr); + String line=""; + while ( (line = br.readLine()) != null) + { + fr.write(line); + fr.newLine(); + fr.flush(); + } + is.close(); + isr.close(); + br.close(); + fr.close(); + } + catch (IOException e) { + System.out.println(e); + runtime.exit(0); + } + } + public void CRF_learn(String model,String FilenameData) throws IOException + { + Runtime runtime = Runtime.getRuntime(); + + Process process = null; + String line = null; + InputStream is = null; + InputStreamReader isr = null; + BufferedReader br = null; + String cmd = "CRF/crf_learn -f 3 -c 4.0 CRF/template_SimConcept "+FilenameData+" "+model; + + try { + process = runtime.exec(cmd); + is = process.getInputStream(); + isr = new InputStreamReader(is, "UTF-8"); + br = new BufferedReader(isr); + while ( (line = br.readLine()) != null) + { + System.out.println(line); + System.out.flush(); + } + is.close(); + isr.close(); + br.close(); + } + catch (IOException e) { + System.out.println(e); + runtime.exit(0); + } + } + public void ReadCRFresult(String Filename,String FilenameOutput,String FilenameBioC) throws XMLStreamException, IOException + { + /** load CRF output */ + ArrayList outputArr1 = new ArrayList(); + BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameOutput), "UTF-8")); + String line; + while ((line = inputfile.readLine()) != null) + { + outputArr1.add(line); + } + inputfile.close(); + + /** + * Recognize the mentions which can be simplified + */ + int Count_mention=0; + boolean Simplified=false; + String Mention=""; + String Mention_NoSpace=""; + String States=""; + HashMap Mentions_hash = new HashMap(); + HashMap States_hash = new HashMap(); + HashMap Output_Split_mention_Ind = new HashMap(); + HashMap Output_Split_mention = new HashMap(); + for(int i=0;i Split_mention = new ArrayList(); + ArrayList Split_state = new ArrayList(); + String tmp_mention=""; + String tmp_state=""; + /** + * count = Mentions_count.get(i) : # of the mention in the corpus (543) + * Mentions_hash.get(count) : Original Mention (ORP - 1 to ORP - 6) + * States_hash.get(count) : States (AASNOOS) + */ + + String TokenArr[]=Mentions_hash.get(MNoSpace).split(" ",-1); + String StateArr[]=States_hash.get(MNoSpace).split("",-1); + + //refinement : isn't used + Pattern ptmp1 = Pattern.compile("^([S]+)([CN])([S]+)$"); + Matcher mtmp1 = ptmp1.matcher(States_hash.get(MNoSpace)); + if(mtmp1.find()) + { + States_hash.put(MNoSpace, mtmp1.group(1)+"J"+mtmp1.group(3)); + } + + //Split BE + int len=TokenArr.length; + if(StateArr.length0) + { + Split_mention.add(tmp_mention); + Split_state.add(tmp_state); + } + tmp_mention = ""; + tmp_state = ""; + } + else //CNBF + { + tmp_mention = tmp_mention + TokenArr[s] + " "; + tmp_state = tmp_state + StateArr[s]; + } + } + if(!tmp_mention.equals("")) + { + Split_mention.add(tmp_mention); + Split_state.add(tmp_state); + } + + //Split B/F + for(int m=0;m strainsX = new ArrayList(); + ArrayList STAstrainsX = new ArrayList(); + String each_token[] = Split_mention.get(m).split(" "); + String each_state[] = Split_state.get(m).split(""); + for(int s=0;s strainsCN = new ArrayList(); + String CorN=""; + + String each_token[] = Split_mention.get(m).split(" ",-1); + String each_state[] = Split_state.get(m).split("",-1); + + for(int k=0;k=4) + { + A=A.replace("s $", ""); + } + A=A+"STRAINXXX"; + strainCN=strainCN+each_token[k]+" "; + CNO_continous=0; + } + else if(each_state[k].matches("[CN]") && CNO_continous==0) + { + CorN=each_state[k]; + strainsCN.add(strainCN); + strainCN=""; + CNO_continous++; + } + else if(each_state[k].equals("J")) + { + if(!strainCN.equals("")){strainsCN.add(strainCN);} + + A=A.replaceAll("STRAINXXXSTRAINXXX","STRAINXXX"); + A=A.replaceAll("STRAINXXXSTRAINXXX","STRAINXXX"); + + ptmp1 = Pattern.compile("^(.+)s (.*)$"); + mtmp1 = ptmp1.matcher(A); + if(mtmp1.find() && mtmp1.group(1).length()>=3 ) + { + A = mtmp1.group(1)+ " "+mtmp1.group(2); + } + + if(CorN.equals("C")) + { + for(int x=0;x2 && (tmp.substring(tmp.length()-2, tmp.length()-2).equals(" "))) + { + tmp = tmp.substring(0,tmp.length()-2); + } + if(Output_Split_mention_Ind.containsKey(MNoSpace)) + { + Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); + } + else + { + Output_Split_mention_Ind.put(MNoSpace, tmp); + } + } + } + else if(CorN.equals("N")) + { + if(strainsCN.contains(0) && strainsCN.contains(1)) + { + String strain1= strainsCN.get(0).replaceAll(" ", ""); + String strain2= strainsCN.get(1).replaceAll(" ", ""); + if(strain1.matches("[0-9]+") && strain2.matches("[0-9]+")) + { + if(Integer.parseInt(strain2)-Integer.parseInt(strain1)<=20) + { + for(int strCount=Integer.parseInt(strain1);strCount<=Integer.parseInt(strain2);strCount++) + { + String tmp=A; + tmp = tmp.replace("STRAINXXX", Integer.toString(strCount)); + tmp = tmp.replaceAll("[ ]+"," "); + if(tmp.length()>2 && tmp.substring(tmp.length()-2, tmp.length()-2).equals(" ")) + { + tmp = tmp.substring(0,tmp.length()-2); + } + if(Output_Split_mention_Ind.containsKey(MNoSpace)) + { + Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); + } + else + { + Output_Split_mention_Ind.put(MNoSpace, tmp); + } + } + } + } + else if(strain1.matches("[A-Z]+ ") && strain2.matches("[A-Z]+ ")) + { + int strInt1 = (int) strain1.replaceAll(" ", "").charAt(0); + int strInt2 = (int) strain2.replaceAll(" ", "").charAt(0); + if(strInt2-strInt1<=20) + { + for(int strCount=strInt1;strCount<=strInt2;strCount++) + { + String tmp=A; + tmp = tmp.replace("STRAINXXX", Integer.toString(strCount)); + tmp = tmp.replaceAll("[ ]+"," "); + if(tmp.length()>2 && tmp.substring(tmp.length()-2, tmp.length()-2).equals(" ")) + { + tmp = tmp.substring(0,tmp.length()-2); + } + if(Output_Split_mention_Ind.containsKey(MNoSpace)) + { + Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); + } + else + { + Output_Split_mention_Ind.put(MNoSpace, tmp); + } + } + } + } + else + { + if(Output_Split_mention.containsKey(MNoSpace)) + { + Output_Split_mention.put(MNoSpace, Output_Split_mention.get(MNoSpace)+"|"+Split_mention.get(m)); + } + else + { + Output_Split_mention.put(MNoSpace, Split_mention.get(m)); + } + } + } + } + else + { + if(Output_Split_mention.containsKey(MNoSpace)) + { + Output_Split_mention.put(MNoSpace, Output_Split_mention.get(MNoSpace)+"|"+Split_mention.get(m)); + } + else + { + Output_Split_mention.put(MNoSpace, Split_mention.get(m)); + } + } + + A=""; + strainCN=""; + CNO_continous=0; + strainsCN = new ArrayList(); + CorN=""; + } + } + if(!strainCN.equals("")){strainsCN.add(strainCN);} + + A=A.replaceAll("(STRAINXXX){2,}","STRAINXXX"); + + ptmp1 = Pattern.compile("^(.+)s (.*)$"); + mtmp1 = ptmp1.matcher(A); + if(mtmp1.find() && mtmp1.group(1).length()>=3 ) + { + A = mtmp1.group(1)+ " "+mtmp1.group(2); + } + + if(CorN.equals("C")) + { + for(int x=0;x2 && (tmp.substring(tmp.length()-2, tmp.length()-2).equals(" "))) + { + tmp = tmp.substring(0,tmp.length()-2); + } + if(Output_Split_mention_Ind.containsKey(MNoSpace)) + { + Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); + } + else + { + Output_Split_mention_Ind.put(MNoSpace, tmp); + } + } + } + else if(CorN.equals("N")) + { + if(strainsCN.size()==2) + { + String strain1= strainsCN.get(0).replaceAll(" ", ""); + String strain2= strainsCN.get(1).replaceAll(" ", ""); + if(strain1.matches("[0-9]{1,7}") && strain2.matches("[0-9]{1,7}")) + { + if(Integer.parseInt(strain2)-Integer.parseInt(strain1)<=20) + { + for(int strCount=Integer.parseInt(strain1);strCount<=Integer.parseInt(strain2);strCount++) + { + String tmp=A; + tmp = tmp.replace("STRAINXXX", Integer.toString(strCount)); + tmp = tmp.replaceAll("[ ]+"," "); + if(tmp.length()>2 && tmp.substring(tmp.length()-2, tmp.length()-2).equals(" ")) + { + tmp = tmp.substring(0,tmp.length()-2); + } + if(Output_Split_mention_Ind.containsKey(MNoSpace)) + { + Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); + } + else + { + Output_Split_mention_Ind.put(MNoSpace, tmp); + } + } + } + } + else if(strain1.matches("[A-Z]+ ") && strain2.matches("[A-Z]+ ")) + { + int strInt1 = (int) strain1.replaceAll(" ", "").charAt(0); + int strInt2 = (int) strain2.replaceAll(" ", "").charAt(0); + if(strInt2-strInt1<=20) + { + for(int strCount=strInt1;strCount<=strInt2;strCount++) + { + String tmp=A; + tmp = tmp.replace("STRAINXXX", Integer.toString(strCount)); + tmp = tmp.replaceAll("[ ]+"," "); + if(tmp.length()>2 && tmp.substring(tmp.length()-2, tmp.length()-2).equals(" ")) + { + tmp = tmp.substring(0,tmp.length()-2); + } + if(Output_Split_mention_Ind.containsKey(MNoSpace)) + { + Output_Split_mention_Ind.put(MNoSpace, Output_Split_mention_Ind.get(MNoSpace)+"|"+tmp); + } + else + { + Output_Split_mention_Ind.put(MNoSpace, tmp); + } + } + } + } + else + { + if(Output_Split_mention.containsKey(MNoSpace)) + { + Output_Split_mention.put(MNoSpace, Output_Split_mention.get(MNoSpace)+"|"+Split_mention.get(m)); + } + else + { + Output_Split_mention.put(MNoSpace, Split_mention.get(m)); + } + } + } + } + else + { + if(Output_Split_mention.containsKey(MNoSpace)) + { + Output_Split_mention.put(MNoSpace, Output_Split_mention.get(MNoSpace)+"|"+Split_mention.get(m)); + } + else + { + Output_Split_mention.put(MNoSpace, Split_mention.get(m)); + } + } + } + } + + for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) + { + for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) + { + int Annotation_Num = GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); + for (int k = 0; k < Annotation_Num ; k++) // k : Annotations + { + String anno[]=GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\\t"); //Mention + String MenArr[]=anno[2].split("\\|"); + for(int m=0;m Mentions = new ArrayList(); + for(int m=0;m ii + // ii --> 2 + for (int i = 0; i < GNormPlus.BioCDocobj.Annotations.size(); i++) + { + for (int j = 0; j < GNormPlus.BioCDocobj.Annotations.get(i).size(); j++) + { + int Annotation_Num = GNormPlus.BioCDocobj.Annotations.get(i).get(j).size(); + for (int k = 0; k < Annotation_Num ; k++) // k : Annotations + { + String anno[]=GNormPlus.BioCDocobj.Annotations.get(i).get(j).get(k).split("\\t"); //Mention + String MenArr[]=anno[2].split("\\|"); + HashMap Mentions = new HashMap(); + for(int m=0;m=5: - if lines[i] not in entity_dict.keys(): - entity_dict[lines[i]]=int(segs[1]) - else: - print('entity have in',lines[i]) - if segs[0] not in error_dict.keys(): - error_dict[segs[0]]=[lines[i]] - else: - if lines[i] not in error_dict[segs[0]]: - error_dict[segs[0]].append(lines[i]) - - entity_sort=sorted(entity_dict.items(), key=lambda kv:(kv[1]), reverse=False) - for ele in entity_sort: - fout.write(ele[0]+'\n') - fout.write('\n') - return fout - -def filter_overlap(infile): #nonest - - fin=io.StringIO(infile.getvalue()) - fout=io.StringIO() - - documents=fin.read().strip().split('\n\n') - fin.close() - total_entity=0 - over_entity=0 - nest_entity=0 - for doc in documents: - lines=doc.split('\n') - entity_list=[] - if len(lines)>2: - first_entity=lines[2].split('\t') - nest_list=[first_entity] - max_eid=int(first_entity[2]) - total_entity+=len(lines)-2 - for i in range(3,len(lines)): - segs=lines[i].split('\t') - if int(segs[1])> max_eid: - if len(nest_list)==1: - entity_list.append(nest_list[0]) - nest_list=[] - nest_list.append(segs) - if int(segs[2])>max_eid: - max_eid=int(segs[2]) - else: - # print(nest_list) - nest_entity+=len(nest_list)-1 - tem=find_max_entity(nest_list)#find max entity - # if len(tem)>1: - # print('max nest >1:',tem) - entity_list.extend(tem) - nest_list=[] - nest_list.append(segs) - if int(segs[2])>max_eid: - max_eid=int(segs[2]) - - else: - nest_list.append(segs) - if int(segs[2])>max_eid: - max_eid=int(segs[2]) - if nest_list!=[]: - if len(nest_list)==1: - entity_list.append(nest_list[0]) - - else: - tem=find_max_entity(nest_list)#find max entity - # if len(tem)>1: - # print('max nest >1:',tem) - entity_list.extend(tem) - fout.write(lines[0]+'\n'+lines[1]+'\n') - for ele in entity_list: - fout.write('\t'.join(ele)+'\n') - fout.write('\n') - # print(total_entity,over_entity, nest_entity) - return fout -def find_max_entity(nest_list): #longest entity - max_len=0 - final_tem=[] - max_index=0 - for i in range(0, len(nest_list)): - cur_len=int(nest_list[i][2])-int(nest_list[i][1]) - if cur_len>max_len: - max_len=cur_len - max_index=i - - final_tem.append(nest_list[max_index]) - return final_tem - -# change ori pubtator format to labeled text , entity begin with " ssss", end with 'eeee ' -def pubtator_to_labeltext(infile): - - fin=io.StringIO(infile.getvalue()) - all_context=fin.read().strip().split('\n\n') - fin.close() - fout=io.StringIO() - label_dic={} - - for doc in all_context: - lines=doc.split('\n') - ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] - pmid=lines[0].split('|t|')[0] - s_index=0 - e_index=0 - new_text='' - for i in range(2,len(lines)): - segs=lines[i].split('\t') - label_dic[segs[4].lower()]=segs[4] - if len(segs)==6: - e_index=int(segs[1]) - new_text+=ori_text[s_index:e_index]+' ssss'+segs[4].lower()+' '+ori_text[int(segs[1]):int(segs[2])]+' eeee'+segs[4].lower()+' ' - s_index=int(segs[2]) - # if ori_text[int(segs[1]):int(segs[2])]!=segs[3]: - # print('error(ori,label):',ori_text[int(segs[1]):int(segs[2])],segs[3]) - - new_text+=ori_text[s_index:] - fout.write(pmid+'\t'+' '.join(new_text.strip().split())+'\n') - return fout, label_dic - - -def pre_token(sentence): - sentence=re.sub("([\=\/\(\)\<\>\+\-\_])"," \\1 ",sentence) - sentence=re.sub("[ ]+"," ",sentence); - return sentence - -# labeltext to conll format (BIO), a token (including features) per line. sentences are split by '\n', or docs are split by '\n' -def labeltext_to_conll_fasttoken(infile,label_dic): - - fin=io.StringIO(infile.getvalue()) - all_context=fin.read().strip().split('\n') - fin.close() - fout=io.StringIO() - - # nlp = stanza.Pipeline(lang='en', processors='tokenize',package='craft') #package='craft' - nlp = stanza.Pipeline(lang='en', processors={'tokenize': 'spacy'},package='None') #package='craft' - - doc_i=0 - for doc in all_context: - doc_text=doc.split('\t')[1] - doc_text=pre_token(doc_text) - doc_stanza = nlp(doc_text) - doc_i+=1 - #print(doc_i) - inentity_flag=0 - last_label='O' - for sent in doc_stanza.sentences: - temp_sent=[] - word_num=0 - for word in sent.words: - word_num+=1 - # print(word.text) - if word.text.strip()=='': - continue - temp_sent.append(word.text) - if word.text.startswith('ssss')==True: - last_label=word.text - inentity_flag=1 - elif word.text.startswith('eeee')==True: - last_label=word.text - inentity_flag=0 - else: - if last_label=='O': - now_label='O' - elif last_label.startswith('ssss')==True: - now_label='B-'+label_dic[last_label[4:]] - - elif last_label.startswith('B-')==True: - now_label='I-'+last_label[2:] - elif last_label.startswith('I-')==True: - now_label='I-'+last_label[2:] - elif last_label.startswith('eeee')==True: - now_label='O' - - fout.write(word.text+'\t'+now_label+'\n') - last_label=now_label - if inentity_flag==1: # if entity is split by sentence, will connate the sentence - # print('sentence error!!!') - # print(word.text,word_num) - # print(temp_sent) - pass - else: - fout.write('\n') - return fout - -def pubtator_to_conll(infile): - - #1.entity sort - input_sort=pubtator_entitysort(infile) - #print(input_sort.getvalue()) - - #2. no overlap, if overlap get longest entity - input_nonest=filter_overlap(input_sort) - # print('......sort.....\n',input_sort.getvalue()) - - #3. pubtator to label text - input_labtext,label_dic=pubtator_to_labeltext(input_nonest) - # print('......label.....\n',input_labtext.getvalue()) - #print(label_dic) - - #4. label text to conll - output = labeltext_to_conll_fasttoken(input_labtext,label_dic) - # print('......output.....\n',output.getvalue()) - # fout=open(outfile,'w',encoding='utf-8') - # fout.write(input_nonest.getvalue()) - # fout.close() - return output - -if __name__=='__main__': - - - infile='../../TrainingSet/No100/NER.Train.txt' - output=pubtator_to_conll(infile) - fout=open('../../TrainingSet/No100/NER.Train.conll','w',encoding='utf-8') - fout.write(output.getvalue()) - fout.close() - output.close() - - - - - +# -*- coding: utf-8 -*- +""" +Created on Wed Sep 7 08:58:22 2022 + +@author: luol2 +""" + +# -*- coding: utf-8 -*- +""" +Created on Fri Jun 24 11:27:57 2022 + +@author: luol2 +""" + + +import stanza +import sys +import os +import io +import json +import re +#sort entity by position in text +def pubtator_entitysort(infile): + + fin=open(infile,'r',encoding='utf-8') + # fout=open(path+'LitCoin/sort/Train_sort.PubTator','w',encoding='utf-8') + fout=io.StringIO() + all_in=fin.read().strip().split('\n\n') + fin.close() + error_dict={} #use to debug error + for doc in all_in: + entity_dict={} + lines=doc.split('\n') + fout.write(lines[0]+'\n'+lines[1]+'\n') + for i in range(2,len(lines)): + segs=lines[i].split('\t') + if len(segs)>=5: + if lines[i] not in entity_dict.keys(): + entity_dict[lines[i]]=int(segs[1]) + else: + print('entity have in',lines[i]) + if segs[0] not in error_dict.keys(): + error_dict[segs[0]]=[lines[i]] + else: + if lines[i] not in error_dict[segs[0]]: + error_dict[segs[0]].append(lines[i]) + + entity_sort=sorted(entity_dict.items(), key=lambda kv:(kv[1]), reverse=False) + for ele in entity_sort: + fout.write(ele[0]+'\n') + fout.write('\n') + return fout + +def filter_overlap(infile): #nonest + + fin=io.StringIO(infile.getvalue()) + fout=io.StringIO() + + documents=fin.read().strip().split('\n\n') + fin.close() + total_entity=0 + over_entity=0 + nest_entity=0 + for doc in documents: + lines=doc.split('\n') + entity_list=[] + if len(lines)>2: + first_entity=lines[2].split('\t') + nest_list=[first_entity] + max_eid=int(first_entity[2]) + total_entity+=len(lines)-2 + for i in range(3,len(lines)): + segs=lines[i].split('\t') + if int(segs[1])> max_eid: + if len(nest_list)==1: + entity_list.append(nest_list[0]) + nest_list=[] + nest_list.append(segs) + if int(segs[2])>max_eid: + max_eid=int(segs[2]) + else: + # print(nest_list) + nest_entity+=len(nest_list)-1 + tem=find_max_entity(nest_list)#find max entity + # if len(tem)>1: + # print('max nest >1:',tem) + entity_list.extend(tem) + nest_list=[] + nest_list.append(segs) + if int(segs[2])>max_eid: + max_eid=int(segs[2]) + + else: + nest_list.append(segs) + if int(segs[2])>max_eid: + max_eid=int(segs[2]) + if nest_list!=[]: + if len(nest_list)==1: + entity_list.append(nest_list[0]) + + else: + tem=find_max_entity(nest_list)#find max entity + # if len(tem)>1: + # print('max nest >1:',tem) + entity_list.extend(tem) + fout.write(lines[0]+'\n'+lines[1]+'\n') + for ele in entity_list: + fout.write('\t'.join(ele)+'\n') + fout.write('\n') + # print(total_entity,over_entity, nest_entity) + return fout +def find_max_entity(nest_list): #longest entity + max_len=0 + final_tem=[] + max_index=0 + for i in range(0, len(nest_list)): + cur_len=int(nest_list[i][2])-int(nest_list[i][1]) + if cur_len>max_len: + max_len=cur_len + max_index=i + + final_tem.append(nest_list[max_index]) + return final_tem + +# change ori pubtator format to labeled text , entity begin with " ssss", end with 'eeee ' +def pubtator_to_labeltext(infile): + + fin=io.StringIO(infile.getvalue()) + all_context=fin.read().strip().split('\n\n') + fin.close() + fout=io.StringIO() + label_dic={} + + for doc in all_context: + lines=doc.split('\n') + ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] + pmid=lines[0].split('|t|')[0] + s_index=0 + e_index=0 + new_text='' + for i in range(2,len(lines)): + segs=lines[i].split('\t') + label_dic[segs[4].lower()]=segs[4] + if len(segs)==6: + e_index=int(segs[1]) + new_text+=ori_text[s_index:e_index]+' ssss'+segs[4].lower()+' '+ori_text[int(segs[1]):int(segs[2])]+' eeee'+segs[4].lower()+' ' + s_index=int(segs[2]) + # if ori_text[int(segs[1]):int(segs[2])]!=segs[3]: + # print('error(ori,label):',ori_text[int(segs[1]):int(segs[2])],segs[3]) + + new_text+=ori_text[s_index:] + fout.write(pmid+'\t'+' '.join(new_text.strip().split())+'\n') + return fout, label_dic + + +def pre_token(sentence): + sentence=re.sub("([\=\/\(\)\<\>\+\-\_])"," \\1 ",sentence) + sentence=re.sub("[ ]+"," ",sentence); + return sentence + +# labeltext to conll format (BIO), a token (including features) per line. sentences are split by '\n', or docs are split by '\n' +def labeltext_to_conll_fasttoken(infile,label_dic): + + fin=io.StringIO(infile.getvalue()) + all_context=fin.read().strip().split('\n') + fin.close() + fout=io.StringIO() + + # nlp = stanza.Pipeline(lang='en', processors='tokenize',package='craft') #package='craft' + nlp = stanza.Pipeline(lang='en', processors={'tokenize': 'spacy'},package='None') #package='craft' + + doc_i=0 + for doc in all_context: + doc_text=doc.split('\t')[1] + doc_text=pre_token(doc_text) + doc_stanza = nlp(doc_text) + doc_i+=1 + #print(doc_i) + inentity_flag=0 + last_label='O' + for sent in doc_stanza.sentences: + temp_sent=[] + word_num=0 + for word in sent.words: + word_num+=1 + # print(word.text) + if word.text.strip()=='': + continue + temp_sent.append(word.text) + if word.text.startswith('ssss')==True: + last_label=word.text + inentity_flag=1 + elif word.text.startswith('eeee')==True: + last_label=word.text + inentity_flag=0 + else: + if last_label=='O': + now_label='O' + elif last_label.startswith('ssss')==True: + now_label='B-'+label_dic[last_label[4:]] + + elif last_label.startswith('B-')==True: + now_label='I-'+last_label[2:] + elif last_label.startswith('I-')==True: + now_label='I-'+last_label[2:] + elif last_label.startswith('eeee')==True: + now_label='O' + + fout.write(word.text+'\t'+now_label+'\n') + last_label=now_label + if inentity_flag==1: # if entity is split by sentence, will connate the sentence + # print('sentence error!!!') + # print(word.text,word_num) + # print(temp_sent) + pass + else: + fout.write('\n') + return fout + +def pubtator_to_conll(infile): + + #1.entity sort + input_sort=pubtator_entitysort(infile) + #print(input_sort.getvalue()) + + #2. no overlap, if overlap get longest entity + input_nonest=filter_overlap(input_sort) + # print('......sort.....\n',input_sort.getvalue()) + + #3. pubtator to label text + input_labtext,label_dic=pubtator_to_labeltext(input_nonest) + # print('......label.....\n',input_labtext.getvalue()) + #print(label_dic) + + #4. label text to conll + output = labeltext_to_conll_fasttoken(input_labtext,label_dic) + # print('......output.....\n',output.getvalue()) + # fout=open(outfile,'w',encoding='utf-8') + # fout.write(input_nonest.getvalue()) + # fout.close() + return output + +if __name__=='__main__': + + + infile='../../TrainingSet/No100/NER.Train.txt' + output=pubtator_to_conll(infile) + fout=open('../../TrainingSet/No100/NER.Train.conll','w',encoding='utf-8') + fout.write(output.getvalue()) + fout.close() + output.close() + + + + + \ No newline at end of file diff --git a/src_python/GeneNER/Evaluation_ner.py b/src_python/GeneNER/Evaluation_ner.py index b1eb140654f7b6a0099c0c4ff8e267e491c7350d..01a27359ae99292ad1cfec86125c4d6066260979 100644 --- a/src_python/GeneNER/Evaluation_ner.py +++ b/src_python/GeneNER/Evaluation_ner.py @@ -1,243 +1,243 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Mar 1 15:33:54 2021 - -@author: luol2 -""" -# from BIO format to entity -def BIO_tag(tokens): - gold_entity={} - pre_entity={} - gold_start,gold_end=0,0 - pre_start,pre_end=0,0 - for i in range(0,len(tokens)): - segs=tokens[i].split('\t') - - # generate gold entity - if segs[1].startswith('B-')>0: - gold_start=i - gold_type=segs[1][2:] - if i+1>=len(tokens): # the last word - gold_end=i - if gold_type in gold_entity.keys(): - gold_entity[gold_type].append([gold_start,gold_end]) - else: - gold_entity[gold_type]=[[gold_start,gold_end]] - else: # non last word - next_seg=tokens[i+1].split('\t') - if next_seg[1].startswith('B-')>0 or next_seg[1]=='O': - gold_end=i - if gold_type in gold_entity.keys(): - gold_entity[gold_type].append([gold_start,gold_end]) - else: - gold_entity[gold_type]=[[gold_start,gold_end]] - elif next_seg[1].startswith('I-')>0: - pass - elif segs[1].startswith('I-')>0: - if i+1>=len(tokens): # the last word - gold_end=i - if gold_type in gold_entity.keys(): - gold_entity[gold_type].append([gold_start,gold_end]) - else: - gold_entity[gold_type]=[[gold_start,gold_end]] - else: # non last word - next_seg=tokens[i+1].split('\t') - if next_seg[1].startswith('B-')>0 or next_seg[1]=='O': - gold_end=i - if gold_type in gold_entity.keys(): - gold_entity[gold_type].append([gold_start,gold_end]) - else: - gold_entity[gold_type]=[[gold_start,gold_end]] - elif next_seg[1].startswith('I-')>0: - pass - elif segs[1]=='O': - pass - - # generate prediction entity - if segs[2].startswith('B-')>0: - pre_start=i - pre_type=segs[2][2:] - if i+1>=len(tokens): # the last word - pre_end=i - if pre_type in pre_entity.keys(): - pre_entity[pre_type].append([pre_start,pre_end]) - else: - pre_entity[pre_type]=[[pre_start,pre_end]] - else: # non last word - next_seg=tokens[i+1].split('\t') - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - if pre_type in pre_entity.keys(): - pre_entity[pre_type].append([pre_start,pre_end]) - else: - pre_entity[pre_type]=[[pre_start,pre_end]] - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2].startswith('I-')>0: - if i==0 and i+10 or next_seg[2]=='O': - pre_end=i - if pre_type in pre_entity.keys(): - pre_entity[pre_type].append([pre_start,pre_end]) - else: - pre_entity[pre_type]=[[pre_start,pre_end]] - elif next_seg[2].startswith('I-')>0: - pass - elif i==0 and i+1==len(tokens):# only one word: - pre_start=i - pre_type=segs[2][2:] - pre_end=i - if pre_type in pre_entity.keys(): - pre_entity[pre_type].append([pre_start,pre_end]) - else: - pre_entity[pre_type]=[[pre_start,pre_end]] - elif i+1>=len(tokens): # the last word - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - pre_end=i - if pre_type in pre_entity.keys(): - pre_entity[pre_type].append([pre_start,pre_end]) - else: - pre_entity[pre_type]=[[pre_start,pre_end]] - elif i+1< len(tokens): # non last word - next_seg=tokens[i+1].split('\t') - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - if pre_type in pre_entity.keys(): - pre_entity[pre_type].append([pre_start,pre_end]) - else: - pre_entity[pre_type]=[[pre_start,pre_end]] - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2]=='O': - pass - # print(tokens) - # print(gold_entity) - # print(pre_entity) - return gold_entity,pre_entity - -# input: token \t Gold \t Prediction\n, sentence is split "\n" -def NER_Evaluation(): - path='//panfs/pan1/bionlp/lulab/luoling/OpenBioIE_project/models/Kfold/BiLSTM-CRF/' - fin=open(path+'dev_pre.conll_all','r',encoding='utf-8') - all_sentence=fin.read().strip().split('\n\n') - fin.close() - Metrics={} #{'entity_type':[TP,gold_num,pre_num]} - - for sentence in all_sentence: - tokens=sentence.split('\n') - gold_entity,pre_entity=BIO_tag(tokens) - # print(tokens) - for entity_type in gold_entity.keys(): - if entity_type not in Metrics.keys(): - Metrics[entity_type]=[0,len(gold_entity[entity_type]),0] - else: - Metrics[entity_type][1]+=len(gold_entity[entity_type]) - for entity_type in pre_entity.keys(): - if entity_type not in Metrics.keys(): - Metrics[entity_type]=[0,0,len(pre_entity[entity_type])] - else: - Metrics[entity_type][2]+=len(pre_entity[entity_type]) - for mention in pre_entity[entity_type]: - if entity_type in gold_entity.keys(): - if mention in gold_entity[entity_type]: - Metrics[entity_type][0]+=1 - print(Metrics) - TP,Gold_num,Pre_num=0,0,0 - for ele in Metrics.keys(): - if Metrics[ele][2]==0: - p=0 - else: - p=Metrics[ele][0]/Metrics[ele][2] - if Metrics[ele][1]==0: - r=0 - else: - r=Metrics[ele][0]/Metrics[ele][1] - if p+r==0: - f1=0 - else: - f1=2*p*r/(p+r) - TP+=Metrics[ele][0] - Gold_num+=Metrics[ele][1] - Pre_num+=Metrics[ele][2] - print(ele+': P=%.5f, R=%.5f, F1=%.5f' % (p,r,f1)) - # break - if Pre_num==0: - P=0 - else: - P=TP/Pre_num - R=TP/Gold_num - F1=2*P*R/(P+R) - print("Overall: P=%.5f, R=%.5f, F1=%.5f"% (P,R,F1)) - -def NER_Evaluation_fn(file): - - fin=open(file,'r',encoding='utf-8') - all_sentence=fin.read().strip().split('\n\n') - fin.close() - Metrics={} #{'entity_type':[TP,gold_num,pre_num]} - breai=0 - for sentence in all_sentence: - breai+=1 - if breai>5000: - break - tokens=sentence.split('\n') - gold_entity,pre_entity=BIO_tag(tokens) - # print(tokens) - for entity_type in gold_entity.keys(): - if entity_type not in Metrics.keys(): - Metrics[entity_type]=[0,len(gold_entity[entity_type]),0] - else: - Metrics[entity_type][1]+=len(gold_entity[entity_type]) - for entity_type in pre_entity.keys(): - if entity_type not in Metrics.keys(): - Metrics[entity_type]=[0,0,len(pre_entity[entity_type])] - else: - Metrics[entity_type][2]+=len(pre_entity[entity_type]) - for mention in pre_entity[entity_type]: - if entity_type in gold_entity.keys(): - if mention in gold_entity[entity_type]: - Metrics[entity_type][0]+=1 - print(Metrics) - TP,Gold_num,Pre_num=0,0,0 - for ele in Metrics.keys(): - if Metrics[ele][2]==0: - p=0 - else: - p=Metrics[ele][0]/Metrics[ele][2] - if Metrics[ele][1]==0: - r=0 - else: - r=Metrics[ele][0]/Metrics[ele][1] - if p+r==0: - f1=0 - else: - f1=2*p*r/(p+r) - TP+=Metrics[ele][0] - Gold_num+=Metrics[ele][1] - Pre_num+=Metrics[ele][2] - print(ele+': P=%.5f, R=%.5f, F1=%.5f' % (p,r,f1)) - # break - if Pre_num==0: - P=0 - else: - P=TP/Pre_num - R=TP/Gold_num - if P+R==0: - F1=0 - else: - F1=2*P*R/(P+R) - print("Overall: P=%.5f, R=%.5f, F1=%.5f"% (P,R,F1)) - return F1 - -if __name__=='__main__': - NER_Evaluation() +# -*- coding: utf-8 -*- +""" +Created on Mon Mar 1 15:33:54 2021 + +@author: luol2 +""" +# from BIO format to entity +def BIO_tag(tokens): + gold_entity={} + pre_entity={} + gold_start,gold_end=0,0 + pre_start,pre_end=0,0 + for i in range(0,len(tokens)): + segs=tokens[i].split('\t') + + # generate gold entity + if segs[1].startswith('B-')>0: + gold_start=i + gold_type=segs[1][2:] + if i+1>=len(tokens): # the last word + gold_end=i + if gold_type in gold_entity.keys(): + gold_entity[gold_type].append([gold_start,gold_end]) + else: + gold_entity[gold_type]=[[gold_start,gold_end]] + else: # non last word + next_seg=tokens[i+1].split('\t') + if next_seg[1].startswith('B-')>0 or next_seg[1]=='O': + gold_end=i + if gold_type in gold_entity.keys(): + gold_entity[gold_type].append([gold_start,gold_end]) + else: + gold_entity[gold_type]=[[gold_start,gold_end]] + elif next_seg[1].startswith('I-')>0: + pass + elif segs[1].startswith('I-')>0: + if i+1>=len(tokens): # the last word + gold_end=i + if gold_type in gold_entity.keys(): + gold_entity[gold_type].append([gold_start,gold_end]) + else: + gold_entity[gold_type]=[[gold_start,gold_end]] + else: # non last word + next_seg=tokens[i+1].split('\t') + if next_seg[1].startswith('B-')>0 or next_seg[1]=='O': + gold_end=i + if gold_type in gold_entity.keys(): + gold_entity[gold_type].append([gold_start,gold_end]) + else: + gold_entity[gold_type]=[[gold_start,gold_end]] + elif next_seg[1].startswith('I-')>0: + pass + elif segs[1]=='O': + pass + + # generate prediction entity + if segs[2].startswith('B-')>0: + pre_start=i + pre_type=segs[2][2:] + if i+1>=len(tokens): # the last word + pre_end=i + if pre_type in pre_entity.keys(): + pre_entity[pre_type].append([pre_start,pre_end]) + else: + pre_entity[pre_type]=[[pre_start,pre_end]] + else: # non last word + next_seg=tokens[i+1].split('\t') + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + if pre_type in pre_entity.keys(): + pre_entity[pre_type].append([pre_start,pre_end]) + else: + pre_entity[pre_type]=[[pre_start,pre_end]] + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2].startswith('I-')>0: + if i==0 and i+10 or next_seg[2]=='O': + pre_end=i + if pre_type in pre_entity.keys(): + pre_entity[pre_type].append([pre_start,pre_end]) + else: + pre_entity[pre_type]=[[pre_start,pre_end]] + elif next_seg[2].startswith('I-')>0: + pass + elif i==0 and i+1==len(tokens):# only one word: + pre_start=i + pre_type=segs[2][2:] + pre_end=i + if pre_type in pre_entity.keys(): + pre_entity[pre_type].append([pre_start,pre_end]) + else: + pre_entity[pre_type]=[[pre_start,pre_end]] + elif i+1>=len(tokens): # the last word + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + pre_end=i + if pre_type in pre_entity.keys(): + pre_entity[pre_type].append([pre_start,pre_end]) + else: + pre_entity[pre_type]=[[pre_start,pre_end]] + elif i+1< len(tokens): # non last word + next_seg=tokens[i+1].split('\t') + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + if pre_type in pre_entity.keys(): + pre_entity[pre_type].append([pre_start,pre_end]) + else: + pre_entity[pre_type]=[[pre_start,pre_end]] + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2]=='O': + pass + # print(tokens) + # print(gold_entity) + # print(pre_entity) + return gold_entity,pre_entity + +# input: token \t Gold \t Prediction\n, sentence is split "\n" +def NER_Evaluation(): + path='//panfs/pan1/bionlp/lulab/luoling/OpenBioIE_project/models/Kfold/BiLSTM-CRF/' + fin=open(path+'dev_pre.conll_all','r',encoding='utf-8') + all_sentence=fin.read().strip().split('\n\n') + fin.close() + Metrics={} #{'entity_type':[TP,gold_num,pre_num]} + + for sentence in all_sentence: + tokens=sentence.split('\n') + gold_entity,pre_entity=BIO_tag(tokens) + # print(tokens) + for entity_type in gold_entity.keys(): + if entity_type not in Metrics.keys(): + Metrics[entity_type]=[0,len(gold_entity[entity_type]),0] + else: + Metrics[entity_type][1]+=len(gold_entity[entity_type]) + for entity_type in pre_entity.keys(): + if entity_type not in Metrics.keys(): + Metrics[entity_type]=[0,0,len(pre_entity[entity_type])] + else: + Metrics[entity_type][2]+=len(pre_entity[entity_type]) + for mention in pre_entity[entity_type]: + if entity_type in gold_entity.keys(): + if mention in gold_entity[entity_type]: + Metrics[entity_type][0]+=1 + print(Metrics) + TP,Gold_num,Pre_num=0,0,0 + for ele in Metrics.keys(): + if Metrics[ele][2]==0: + p=0 + else: + p=Metrics[ele][0]/Metrics[ele][2] + if Metrics[ele][1]==0: + r=0 + else: + r=Metrics[ele][0]/Metrics[ele][1] + if p+r==0: + f1=0 + else: + f1=2*p*r/(p+r) + TP+=Metrics[ele][0] + Gold_num+=Metrics[ele][1] + Pre_num+=Metrics[ele][2] + print(ele+': P=%.5f, R=%.5f, F1=%.5f' % (p,r,f1)) + # break + if Pre_num==0: + P=0 + else: + P=TP/Pre_num + R=TP/Gold_num + F1=2*P*R/(P+R) + print("Overall: P=%.5f, R=%.5f, F1=%.5f"% (P,R,F1)) + +def NER_Evaluation_fn(file): + + fin=open(file,'r',encoding='utf-8') + all_sentence=fin.read().strip().split('\n\n') + fin.close() + Metrics={} #{'entity_type':[TP,gold_num,pre_num]} + breai=0 + for sentence in all_sentence: + breai+=1 + if breai>5000: + break + tokens=sentence.split('\n') + gold_entity,pre_entity=BIO_tag(tokens) + # print(tokens) + for entity_type in gold_entity.keys(): + if entity_type not in Metrics.keys(): + Metrics[entity_type]=[0,len(gold_entity[entity_type]),0] + else: + Metrics[entity_type][1]+=len(gold_entity[entity_type]) + for entity_type in pre_entity.keys(): + if entity_type not in Metrics.keys(): + Metrics[entity_type]=[0,0,len(pre_entity[entity_type])] + else: + Metrics[entity_type][2]+=len(pre_entity[entity_type]) + for mention in pre_entity[entity_type]: + if entity_type in gold_entity.keys(): + if mention in gold_entity[entity_type]: + Metrics[entity_type][0]+=1 + print(Metrics) + TP,Gold_num,Pre_num=0,0,0 + for ele in Metrics.keys(): + if Metrics[ele][2]==0: + p=0 + else: + p=Metrics[ele][0]/Metrics[ele][2] + if Metrics[ele][1]==0: + r=0 + else: + r=Metrics[ele][0]/Metrics[ele][1] + if p+r==0: + f1=0 + else: + f1=2*p*r/(p+r) + TP+=Metrics[ele][0] + Gold_num+=Metrics[ele][1] + Pre_num+=Metrics[ele][2] + print(ele+': P=%.5f, R=%.5f, F1=%.5f' % (p,r,f1)) + # break + if Pre_num==0: + P=0 + else: + P=TP/Pre_num + R=TP/Gold_num + if P+R==0: + F1=0 + else: + F1=2*P*R/(P+R) + print("Overall: P=%.5f, R=%.5f, F1=%.5f"% (P,R,F1)) + return F1 + +if __name__=='__main__': + NER_Evaluation() diff --git a/src_python/GeneNER/model_ner.py b/src_python/GeneNER/model_ner.py index 7e86cea3984d846a7ab509970b34345645ec3e46..67968c616870d79a51c1ab7d785846413629ddb2 100644 --- a/src_python/GeneNER/model_ner.py +++ b/src_python/GeneNER/model_ner.py @@ -1,102 +1,102 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Feb 10 09:08:09 2021 - -@author: luol2 -""" -import tensorflow as tf -from src_python.GeneNER.represent_ner import Hugface_RepresentationLayer -from tensorflow.keras.layers import * -from tensorflow.keras.models import Model -from tensorflow.keras.optimizers import RMSprop, SGD, Adam, Adadelta, Adagrad,Nadam -from transformers import TFBertModel, BertConfig,TFElectraModel,TFAutoModel -import numpy as np -import sys - - -class LRSchedule_LINEAR(tf.keras.optimizers.schedules.LearningRateSchedule): - def __init__( - self, - init_lr=5e-5, - init_warmup_lr=0.0, - final_lr=5e-7, - warmup_steps=0, - decay_steps=0, - ): - super().__init__() - self.init_lr = init_lr - self.init_warmup_lr=init_warmup_lr - self.final_lr = final_lr - self.warmup_steps = warmup_steps - self.decay_steps = decay_steps - - def __call__(self, step): - """ linear warm up - linear decay """ - if self.warmup_steps>0: - warmup_lr = (self.init_lr - self.init_warmup_lr)/self.warmup_steps * step+self.init_warmup_lr - else: - warmup_lr=1000.0 - #print('\n.......warmup_lr:',warmup_lr) - decay_lr = tf.math.maximum( - self.final_lr, - self.init_lr - (step - self.warmup_steps)/self.decay_steps*(self.init_lr - self.final_lr) - ) - #print('\n.....decay_lr:',decay_lr) - return tf.math.minimum(warmup_lr,decay_lr) - - -class HUGFACE_NER(): #huggingface transformers - def __init__(self, model_files): - self.model_type='HUGFACE' - self.maxlen = 256 #sent 256 doc-512,pretrain-sent 128 - self.checkpoint_path = model_files['checkpoint_path'] - self.label_file=model_files['labelfile'] - self.lowercase=model_files['lowercase'] - self.rep = Hugface_RepresentationLayer(self.checkpoint_path, self.label_file, lowercase=self.lowercase) - - - def build_encoder(self): - print('...vocab len:',self.rep.vocab_len) - plm_model = TFAutoModel.from_pretrained(self.checkpoint_path, from_pt=True) - # plm_model.resize_token_embeddings(self.rep.vocab_len) - x1_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='input_ids') - x2_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='token_type_ids') - x3_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='attention_mask') - x = plm_model(x1_in, token_type_ids=x2_in, attention_mask=x3_in)[0] - #dense = TimeDistributed(Dense(512, activation='relu'), name='dense1')(x) - self.encoder = Model (inputs=[x1_in,x2_in,x3_in], outputs=x,name='hugface_encoder') - self.encoder.summary() - - def build_softmax_decoder(self): - - x1_in = Input(shape=(self.maxlen,),dtype=tf.int32) - x2_in = Input(shape=(self.maxlen,),dtype=tf.int32) - x3_in = Input(shape=(self.maxlen,),dtype=tf.int32) - features = self.encoder([x1_in,x2_in,x3_in]) - #features = Dropout(0.4)(features) - features = TimeDistributed(Dense(128, activation='relu'), name='dense2')(features) - features= Dropout(0.1)(features) - output = TimeDistributed(Dense(self.rep.label_table_size, activation='softmax'), name='softmax')(features) - self.model = Model(inputs=[x1_in,x2_in,x3_in], outputs=output, name="hugface_softmax") - - lr_schedule=LRSchedule_LINEAR( - init_lr=1e-5, - init_warmup_lr=1e-7, - final_lr=5e-6, - warmup_steps=0, - decay_steps=1000) - - opt = Adam(learning_rate = lr_schedule) - #opt = Adam(lr=5e-6) - self.model.compile( - optimizer=opt, - loss='sparse_categorical_crossentropy', - metrics=['accuracy'], - ) - self.model.summary() - - - def load_model(self,model_file): - self.model.load_weights(model_file) - self.model.summary() - print('load HUGFACE model done!') +# -*- coding: utf-8 -*- +""" +Created on Wed Feb 10 09:08:09 2021 + +@author: luol2 +""" +import tensorflow as tf +from src_python.GeneNER.represent_ner import Hugface_RepresentationLayer +from tensorflow.keras.layers import * +from tensorflow.keras.models import Model +from tensorflow.keras.optimizers import RMSprop, SGD, Adam, Adadelta, Adagrad,Nadam +from transformers import TFBertModel, BertConfig,TFElectraModel,TFAutoModel +import numpy as np +import sys + + +class LRSchedule_LINEAR(tf.keras.optimizers.schedules.LearningRateSchedule): + def __init__( + self, + init_lr=5e-5, + init_warmup_lr=0.0, + final_lr=5e-7, + warmup_steps=0, + decay_steps=0, + ): + super().__init__() + self.init_lr = init_lr + self.init_warmup_lr=init_warmup_lr + self.final_lr = final_lr + self.warmup_steps = warmup_steps + self.decay_steps = decay_steps + + def __call__(self, step): + """ linear warm up - linear decay """ + if self.warmup_steps>0: + warmup_lr = (self.init_lr - self.init_warmup_lr)/self.warmup_steps * step+self.init_warmup_lr + else: + warmup_lr=1000.0 + #print('\n.......warmup_lr:',warmup_lr) + decay_lr = tf.math.maximum( + self.final_lr, + self.init_lr - (step - self.warmup_steps)/self.decay_steps*(self.init_lr - self.final_lr) + ) + #print('\n.....decay_lr:',decay_lr) + return tf.math.minimum(warmup_lr,decay_lr) + + +class HUGFACE_NER(): #huggingface transformers + def __init__(self, model_files): + self.model_type='HUGFACE' + self.maxlen = 256 #sent 256 doc-512,pretrain-sent 128 + self.checkpoint_path = model_files['checkpoint_path'] + self.label_file=model_files['labelfile'] + self.lowercase=model_files['lowercase'] + self.rep = Hugface_RepresentationLayer(self.checkpoint_path, self.label_file, lowercase=self.lowercase) + + + def build_encoder(self): + print('...vocab len:',self.rep.vocab_len) + plm_model = TFAutoModel.from_pretrained(self.checkpoint_path, from_pt=True) + # plm_model.resize_token_embeddings(self.rep.vocab_len) + x1_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='input_ids') + x2_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='token_type_ids') + x3_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='attention_mask') + x = plm_model(x1_in, token_type_ids=x2_in, attention_mask=x3_in)[0] + #dense = TimeDistributed(Dense(512, activation='relu'), name='dense1')(x) + self.encoder = Model (inputs=[x1_in,x2_in,x3_in], outputs=x,name='hugface_encoder') + self.encoder.summary() + + def build_softmax_decoder(self): + + x1_in = Input(shape=(self.maxlen,),dtype=tf.int32) + x2_in = Input(shape=(self.maxlen,),dtype=tf.int32) + x3_in = Input(shape=(self.maxlen,),dtype=tf.int32) + features = self.encoder([x1_in,x2_in,x3_in]) + #features = Dropout(0.4)(features) + features = TimeDistributed(Dense(128, activation='relu'), name='dense2')(features) + features= Dropout(0.1)(features) + output = TimeDistributed(Dense(self.rep.label_table_size, activation='softmax'), name='softmax')(features) + self.model = Model(inputs=[x1_in,x2_in,x3_in], outputs=output, name="hugface_softmax") + + lr_schedule=LRSchedule_LINEAR( + init_lr=1e-5, + init_warmup_lr=1e-7, + final_lr=5e-6, + warmup_steps=0, + decay_steps=1000) + + opt = Adam(learning_rate = lr_schedule) + #opt = Adam(lr=5e-6) + self.model.compile( + optimizer=opt, + loss='sparse_categorical_crossentropy', + metrics=['accuracy'], + ) + self.model.summary() + + + def load_model(self,model_file): + self.model.load_weights(model_file) + self.model.summary() + print('load HUGFACE model done!') diff --git a/src_python/GeneNER/ner_tag.py b/src_python/GeneNER/ner_tag.py index 58b81afeae66bb6b4bb08929af4a8ae2f44153c5..69c4944e45508ee18bf8236cedcc8512f931039d 100644 --- a/src_python/GeneNER/ner_tag.py +++ b/src_python/GeneNER/ner_tag.py @@ -1,85 +1,85 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Jun 8 11:01:23 2022 - -@author: luol2 -""" - - - -import io -import re -from src_python.GeneNER.processing_data_ner import ml_intext_fn,out_BIO_BERT_softmax_fn -from src_python.GeneNER.restore_index_ner import NN_restore_index_fn -import tensorflow as tf -gpu = tf.config.list_physical_devices('GPU') -print("Num GPUs Available: ", len(gpu)) -if len(gpu) > 0: - tf.config.experimental.set_memory_growth(gpu[0], True) - -def pre_token(sentence): - sentence=re.sub("([\W\-\_])"," \\1 ",sentence) - sentence=re.sub("[ ]+"," ",sentence); - return sentence - -def ssplit_token_pos_lemma(in_text,text_level,nlp_token, max_len=400): - #print('max_len:',max_len) - fout=io.StringIO() - - in_text=in_text.strip() - in_text=pre_token(in_text) - doc_stanza = nlp_token(in_text) - strlen=0 - for sent in doc_stanza.sentences: - for word in sent.words: - strlen+=1 - if word.text.strip()=='': - pass - #print('!!!!blank token text!!!') - else: - fout.write(word.text+'\tO\n') - if strlen>=max_len: - #print('long sentence:',strlen) - fout.write('\n') - strlen=0 - if text_level=='SENT': - fout.write('\n') - strlen=0 - if text_level=='DOC': - fout.write('\n') - - return fout.getvalue() - -def ml_tagging(ml_input,nn_model): - - test_list = ml_intext_fn(ml_input) - test_x,test_y, test_bert_text_label=nn_model.rep.load_data_hugface(test_list,word_max_len=nn_model.maxlen,label_type='softmax') - test_pre = nn_model.model.predict(test_x,batch_size=64) - test_decode_temp=out_BIO_BERT_softmax_fn(test_pre,test_bert_text_label,nn_model.rep.index_2_label) - - return test_decode_temp -# only machine learning-based method -def ML_Tag(text,ml_model,nlp_token,text_level='SENT'): - -# startTime=time.time() - ssplit_token=ssplit_token_pos_lemma(text, text_level, nlp_token, max_len=ml_model.maxlen) - #print(ssplit_token) -# print('ssplit token:',time.time()-startTime) - -# startTime=time.time() - ml_tsv=ml_tagging(ssplit_token,ml_model) - #print(ml_tsv) -# print('ml ner:',time.time()-startTime) - - final_result= NN_restore_index_fn(text,ml_tsv) - - # print('final ner:',time.time()-startTime) - - return final_result - - - - - - - +# -*- coding: utf-8 -*- +""" +Created on Wed Jun 8 11:01:23 2022 + +@author: luol2 +""" + + + +import io +import re +from src_python.GeneNER.processing_data_ner import ml_intext_fn,out_BIO_BERT_softmax_fn +from src_python.GeneNER.restore_index_ner import NN_restore_index_fn +import tensorflow as tf +gpu = tf.config.list_physical_devices('GPU') +print("Num GPUs Available: ", len(gpu)) +if len(gpu) > 0: + tf.config.experimental.set_memory_growth(gpu[0], True) + +def pre_token(sentence): + sentence=re.sub("([\W\-\_])"," \\1 ",sentence) + sentence=re.sub("[ ]+"," ",sentence); + return sentence + +def ssplit_token_pos_lemma(in_text,text_level,nlp_token, max_len=400): + #print('max_len:',max_len) + fout=io.StringIO() + + in_text=in_text.strip() + in_text=pre_token(in_text) + doc_stanza = nlp_token(in_text) + strlen=0 + for sent in doc_stanza.sentences: + for word in sent.words: + strlen+=1 + if word.text.strip()=='': + pass + #print('!!!!blank token text!!!') + else: + fout.write(word.text+'\tO\n') + if strlen>=max_len: + #print('long sentence:',strlen) + fout.write('\n') + strlen=0 + if text_level=='SENT': + fout.write('\n') + strlen=0 + if text_level=='DOC': + fout.write('\n') + + return fout.getvalue() + +def ml_tagging(ml_input,nn_model): + + test_list = ml_intext_fn(ml_input) + test_x,test_y, test_bert_text_label=nn_model.rep.load_data_hugface(test_list,word_max_len=nn_model.maxlen,label_type='softmax') + test_pre = nn_model.model.predict(test_x,batch_size=64) + test_decode_temp=out_BIO_BERT_softmax_fn(test_pre,test_bert_text_label,nn_model.rep.index_2_label) + + return test_decode_temp +# only machine learning-based method +def ML_Tag(text,ml_model,nlp_token,text_level='SENT'): + +# startTime=time.time() + ssplit_token=ssplit_token_pos_lemma(text, text_level, nlp_token, max_len=ml_model.maxlen) + #print(ssplit_token) +# print('ssplit token:',time.time()-startTime) + +# startTime=time.time() + ml_tsv=ml_tagging(ssplit_token,ml_model) + #print(ml_tsv) +# print('ml ner:',time.time()-startTime) + + final_result= NN_restore_index_fn(text,ml_tsv) + + # print('final ner:',time.time()-startTime) + + return final_result + + + + + + + diff --git a/src_python/GeneNER/processing_data_ner.py b/src_python/GeneNER/processing_data_ner.py index bebab50c6f966172c4d11400102b4ed11fdea5c1..4d3157fad86dd191ec469176e0a2e1ec12cc391e 100644 --- a/src_python/GeneNER/processing_data_ner.py +++ b/src_python/GeneNER/processing_data_ner.py @@ -1,210 +1,210 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Mar 10 16:34:12 2020 - -@author: luol2 -""" -import numpy as np -import io -import sys -#read ner text (word\tlabel), generate the list[[[w1,label],[w2,label]]] -def ml_intext(file): - fin=open(file,'r',encoding='utf-8') - alltexts=fin.read().strip().split('\n\n') - fin.close() - data_list=[] - - for sents in alltexts: - lines=sents.split('\n') - temp_sentece=[] - for i in range(0,len(lines)): - seg=lines[i].split('\t') - temp_sentece.append(seg[:]) - - data_list.append(temp_sentece) - #print(data_list) - #print(label_list) - return data_list - -def ml_intext_fn(ml_input): - fin=io.StringIO(ml_input) - alltexts=fin.read().strip().split('\n\n') - fin.close() - data_list=[] - - for sents in alltexts: - lines=sents.split('\n') - temp_sentece=[] - for i in range(0,len(lines)): - seg=lines[i].split('\t') - temp_sentece.append(seg[:]) - - data_list.append(temp_sentece) - #print(data_list) - #print(label_list) - return data_list - -# model predict result to conll evalute format [token answer predict] -def out_BIO_crf(file,raw_pre,raw_input,label_set): - fout=open(file,'w',encoding='utf-8') - for i in range(len(raw_input)): - - for j in range(len(raw_input[i])): - if jmax_len: - max_len=word_len - print(seg[0]) - for i in range(word_len): - if seg[0][i] not in char_vocab: - char_vocab.append(seg[0][i]) - #else: - # fout.write(line) - fin.close() - #fout.close() - for ele in char_vocab: - fout_char.write(ele+'\n') - fout_char.close() - print('max_len:',max_len) - - -if __name__=='__main__': - # infile='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/data/pubmed_unlabel/mutation_disease_1990.ner_BIO' - # #outfile='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/data/pubmed_unlabel/mutation_disease_1990.ner_BIO_new' - # outfile_char='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/src/nn_model/vocab/char_vocab' - # #processing_text(file) - # char_vocab(infile,outfile_char) - a=[1,2,3] - print(a[:-1]) +# -*- coding: utf-8 -*- +""" +Created on Tue Mar 10 16:34:12 2020 + +@author: luol2 +""" +import numpy as np +import io +import sys +#read ner text (word\tlabel), generate the list[[[w1,label],[w2,label]]] +def ml_intext(file): + fin=open(file,'r',encoding='utf-8') + alltexts=fin.read().strip().split('\n\n') + fin.close() + data_list=[] + + for sents in alltexts: + lines=sents.split('\n') + temp_sentece=[] + for i in range(0,len(lines)): + seg=lines[i].split('\t') + temp_sentece.append(seg[:]) + + data_list.append(temp_sentece) + #print(data_list) + #print(label_list) + return data_list + +def ml_intext_fn(ml_input): + fin=io.StringIO(ml_input) + alltexts=fin.read().strip().split('\n\n') + fin.close() + data_list=[] + + for sents in alltexts: + lines=sents.split('\n') + temp_sentece=[] + for i in range(0,len(lines)): + seg=lines[i].split('\t') + temp_sentece.append(seg[:]) + + data_list.append(temp_sentece) + #print(data_list) + #print(label_list) + return data_list + +# model predict result to conll evalute format [token answer predict] +def out_BIO_crf(file,raw_pre,raw_input,label_set): + fout=open(file,'w',encoding='utf-8') + for i in range(len(raw_input)): + + for j in range(len(raw_input[i])): + if jmax_len: + max_len=word_len + print(seg[0]) + for i in range(word_len): + if seg[0][i] not in char_vocab: + char_vocab.append(seg[0][i]) + #else: + # fout.write(line) + fin.close() + #fout.close() + for ele in char_vocab: + fout_char.write(ele+'\n') + fout_char.close() + print('max_len:',max_len) + + +if __name__=='__main__': + # infile='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/data/pubmed_unlabel/mutation_disease_1990.ner_BIO' + # #outfile='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/data/pubmed_unlabel/mutation_disease_1990.ner_BIO_new' + # outfile_char='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/src/nn_model/vocab/char_vocab' + # #processing_text(file) + # char_vocab(infile,outfile_char) + a=[1,2,3] + print(a[:-1]) diff --git a/src_python/GeneNER/represent_ner.py b/src_python/GeneNER/represent_ner.py index 15160314c8c5f88062ce86cbca98075a454724c0..dc0f5fc7b005a235d1d207876a636d11a7b16c1b 100644 --- a/src_python/GeneNER/represent_ner.py +++ b/src_python/GeneNER/represent_ner.py @@ -1,183 +1,183 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Aug 30 19:54:17 2021 - -@author: luol2 -""" - - - -import os, sys -import numpy as np -from tensorflow.keras.preprocessing.sequence import pad_sequences -from transformers import AutoTokenizer - - -class Hugface_RepresentationLayer(object): - - - def __init__(self, tokenizer_name_or_path, label_file,lowercase=True): - - - #load vocab - - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_fast=True,do_lower_case=lowercase) - self.label_2_index={} - self.index_2_label={} - self.label_table_size=0 - self.load_label_vocab(label_file,self.label_2_index,self.index_2_label) - self.label_table_size=len(self.label_2_index) - self.vocab_len=len(self.tokenizer) - - def load_label_vocab(self,fea_file,fea_index,index_2_label): - - fin=open(fea_file,'r',encoding='utf-8') - all_text=fin.read().strip().split('\n') - fin.close() - for i in range(0,len(all_text)): - fea_index[all_text[i]]=i - index_2_label[str(i)]=all_text[i] - - - - def generate_label_list(self,ori_tokens,labels,word_index): #the lable of subtoken is the same with the label of first subtoken - label_list=['O']*len(word_index) - - label_list_index=[] - old_new_token_map=[] - ori_i=0 - for i in range(0,len(word_index)): - if word_index[i]==None: - label_list_index.append(self.label_2_index[label_list[i]]) - else: - label_list[i]=labels[word_index[i]] - label_list_index.append(self.label_2_index[label_list[i]]) - if word_index[i]==ori_i: - old_new_token_map.append(i) - ori_i+=1 - - - bert_text_label=[] - for i in range(0,len(ori_tokens)): - bert_text_label.append([ori_tokens[i],labels[i],old_new_token_map[i]]) - - return label_list_index,bert_text_label - - def generate_label_list_B(self,ori_tokens,labels,word_index): #tonly first subtoken is B, other is I - label_list=['O']*len(word_index) - - label_list_index=[] - old_new_token_map=[] - ori_i=0 - first_index=-1 - i=0 - while i max len - break - return label_list_index,bert_text_label - - def load_data_hugface(self,instances, word_max_len=100, label_type='softmax'): - - x_index=[] - x_seg=[] - x_mask=[] - y_list=[] - bert_text_labels=[] - max_len=0 - over_num=0 - maxT=word_max_len - ave_len=0 - - #print('instances:', instances) - #print('labels:',labels) - - - for sentence in instances: - sentence_text_list=[] - label_list=[] - for j in range(0,len(sentence)): - sentence_text_list.append(sentence[j][0]) - label_list.append(sentence[j][-1]) - - token_result=self.tokenizer( - sentence_text_list, - max_length=word_max_len, - truncation=True,is_split_into_words=True) - - bert_tokens=self.tokenizer.convert_ids_to_tokens(token_result['input_ids']) - word_index=token_result.word_ids(batch_index=0) - ave_len+=len(bert_tokens) - if len(sentence_text_list)>max_len: - max_len=len(sentence_text_list) - if len(bert_tokens)==maxT: - over_num+=1 - - x_index.append(token_result['input_ids']) - x_seg.append(token_result['token_type_ids']) - x_mask.append(token_result['attention_mask']) - - #print('\nsentence_text_list:',len(sentence_text_list),sentence_text_list) - #print('\nlabel:',len(label_list),label_list) - #print('\nword_index:',len(word_index),word_index) - #print('\nbert_tokens:',len(bert_tokens),bert_tokens) - label_list,bert_text_label=self.generate_label_list_B(sentence_text_list,label_list,word_index) # the label list after bert token, ori token/lable/new index - #print('\nlabel list:',len(label_list),label_list) - #print('\nbert_text_label:',len(bert_text_label),bert_text_label) - #sys.exit() - y_list.append(label_list) - #print(y_list) - bert_text_labels.append(bert_text_label) - - - x1_np = pad_sequences(x_index, word_max_len, value=0, padding='post',truncating='post') # right padding - x2_np = pad_sequences(x_seg, word_max_len, value=0, padding='post',truncating='post') - x3_np = pad_sequences(x_mask, word_max_len, value=0, padding='post',truncating='post') - y_np = pad_sequences(y_list, word_max_len, value=0, padding='post',truncating='post') - #print('x1_np:',x1_np) - #print('\nx2_np:',x2_np) - #print('\ny_np:',y_np) - #print('\nbert_text:',bert_text_labels) - # print('bert max len:',max_len,',Over',maxT,':',over_num,'ave len:',ave_len/len(instances),'total:',len(instances)) - - if label_type=='softmax': - y_np = np.expand_dims(y_np, 2) - elif label_type=='crf': - pass - - return [x1_np, x2_np,x3_np], y_np,bert_text_labels - - -if __name__ == '__main__': - pass - - - +# -*- coding: utf-8 -*- +""" +Created on Mon Aug 30 19:54:17 2021 + +@author: luol2 +""" + + + +import os, sys +import numpy as np +from tensorflow.keras.preprocessing.sequence import pad_sequences +from transformers import AutoTokenizer + + +class Hugface_RepresentationLayer(object): + + + def __init__(self, tokenizer_name_or_path, label_file,lowercase=True): + + + #load vocab + + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_fast=True,do_lower_case=lowercase) + self.label_2_index={} + self.index_2_label={} + self.label_table_size=0 + self.load_label_vocab(label_file,self.label_2_index,self.index_2_label) + self.label_table_size=len(self.label_2_index) + self.vocab_len=len(self.tokenizer) + + def load_label_vocab(self,fea_file,fea_index,index_2_label): + + fin=open(fea_file,'r',encoding='utf-8') + all_text=fin.read().strip().split('\n') + fin.close() + for i in range(0,len(all_text)): + fea_index[all_text[i]]=i + index_2_label[str(i)]=all_text[i] + + + + def generate_label_list(self,ori_tokens,labels,word_index): #the lable of subtoken is the same with the label of first subtoken + label_list=['O']*len(word_index) + + label_list_index=[] + old_new_token_map=[] + ori_i=0 + for i in range(0,len(word_index)): + if word_index[i]==None: + label_list_index.append(self.label_2_index[label_list[i]]) + else: + label_list[i]=labels[word_index[i]] + label_list_index.append(self.label_2_index[label_list[i]]) + if word_index[i]==ori_i: + old_new_token_map.append(i) + ori_i+=1 + + + bert_text_label=[] + for i in range(0,len(ori_tokens)): + bert_text_label.append([ori_tokens[i],labels[i],old_new_token_map[i]]) + + return label_list_index,bert_text_label + + def generate_label_list_B(self,ori_tokens,labels,word_index): #tonly first subtoken is B, other is I + label_list=['O']*len(word_index) + + label_list_index=[] + old_new_token_map=[] + ori_i=0 + first_index=-1 + i=0 + while i max len + break + return label_list_index,bert_text_label + + def load_data_hugface(self,instances, word_max_len=100, label_type='softmax'): + + x_index=[] + x_seg=[] + x_mask=[] + y_list=[] + bert_text_labels=[] + max_len=0 + over_num=0 + maxT=word_max_len + ave_len=0 + + #print('instances:', instances) + #print('labels:',labels) + + + for sentence in instances: + sentence_text_list=[] + label_list=[] + for j in range(0,len(sentence)): + sentence_text_list.append(sentence[j][0]) + label_list.append(sentence[j][-1]) + + token_result=self.tokenizer( + sentence_text_list, + max_length=word_max_len, + truncation=True,is_split_into_words=True) + + bert_tokens=self.tokenizer.convert_ids_to_tokens(token_result['input_ids']) + word_index=token_result.word_ids(batch_index=0) + ave_len+=len(bert_tokens) + if len(sentence_text_list)>max_len: + max_len=len(sentence_text_list) + if len(bert_tokens)==maxT: + over_num+=1 + + x_index.append(token_result['input_ids']) + x_seg.append(token_result['token_type_ids']) + x_mask.append(token_result['attention_mask']) + + #print('\nsentence_text_list:',len(sentence_text_list),sentence_text_list) + #print('\nlabel:',len(label_list),label_list) + #print('\nword_index:',len(word_index),word_index) + #print('\nbert_tokens:',len(bert_tokens),bert_tokens) + label_list,bert_text_label=self.generate_label_list_B(sentence_text_list,label_list,word_index) # the label list after bert token, ori token/lable/new index + #print('\nlabel list:',len(label_list),label_list) + #print('\nbert_text_label:',len(bert_text_label),bert_text_label) + #sys.exit() + y_list.append(label_list) + #print(y_list) + bert_text_labels.append(bert_text_label) + + + x1_np = pad_sequences(x_index, word_max_len, value=0, padding='post',truncating='post') # right padding + x2_np = pad_sequences(x_seg, word_max_len, value=0, padding='post',truncating='post') + x3_np = pad_sequences(x_mask, word_max_len, value=0, padding='post',truncating='post') + y_np = pad_sequences(y_list, word_max_len, value=0, padding='post',truncating='post') + #print('x1_np:',x1_np) + #print('\nx2_np:',x2_np) + #print('\ny_np:',y_np) + #print('\nbert_text:',bert_text_labels) + # print('bert max len:',max_len,',Over',maxT,':',over_num,'ave len:',ave_len/len(instances),'total:',len(instances)) + + if label_type=='softmax': + y_np = np.expand_dims(y_np, 2) + elif label_type=='crf': + pass + + return [x1_np, x2_np,x3_np], y_np,bert_text_labels + + +if __name__ == '__main__': + pass + + + diff --git a/src_python/GeneNER/restore_index_ner.py b/src_python/GeneNER/restore_index_ner.py index d62a612ad333c4adb8aafc13a780111a127e0ecd..d02077cab2ac171a72d172bea0782fad580ba24e 100644 --- a/src_python/GeneNER/restore_index_ner.py +++ b/src_python/GeneNER/restore_index_ner.py @@ -1,447 +1,447 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Mar 5 10:40:08 2021 - -@author: luol2 -""" - -# -*- coding: utf-8 -*- -""" -Created on Sun Jun 14 17:19:02 2020 - -@author: luol2 -""" - -import io -import sys - -# from BIO format to entity,list line is sentence, follwing the entity(start, end, text, entity, type) -def NN_BIO_tag_entity(pre_BIO): - sentences=pre_BIO.strip().split('\n\n') - - pre_result=[] - #print(sentences) - for sent in sentences: - tokens=sent.split('\n') - pre_entity=[] - pre_start,pre_end=0,0 - sent_text='' - for i in range(0,len(tokens)): - segs=tokens[i].split('\t') - sent_text+=segs[0]+' ' - if len(segs)<3: - continue - #print(tokens) - # generate prediction entity - if segs[2].startswith('B-')>0: - pre_start=i - pre_type=segs[2][2:] - if i+1>=len(tokens): # the last word - pre_end=i - pre_entity.append([pre_start,pre_end,pre_type]) - else: # non last word - next_seg=tokens[i+1].split('\t') - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start,pre_end,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2].startswith('I-')>0: - if i==0 and i+10 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start,pre_end,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif i==0 and i+1==len(tokens):# only one word: - pre_start=i - pre_type=segs[2][2:] - pre_end=i - pre_entity.append([pre_start,pre_end,pre_type]) - elif i+1>=len(tokens): # the last word - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - pre_end=i - pre_entity.append([pre_start,pre_end,pre_type]) - elif i+1< len(tokens): # non last word - next_seg=tokens[i+1].split('\t') - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start,pre_end,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2]=='O': - pass - pre_result.append([sent_text.rstrip(),pre_entity]) - - - # print(pre_entity) - return pre_result - -def NN_restore_index_fn(ori_text,file_pre): - - input_result=NN_BIO_tag_entity(file_pre) - #print(input_result) - - - new_sentence='' - restore_result=[] - - sentence_ori=ori_text.lower() - - for sent_ele in input_result: - - #print(pre_lines) -# print(sentence_ori) - if len(sent_ele[1])>0: - #print(pre_lines) - sentence_pre=sent_ele[0].lower() - sentence_pre=sentence_pre.split() - - pre_result=sent_ele[1] - - - restore_sid=0 - restore_eid=0 - each_word_id=[] - - for i in range(0,len(sentence_pre)): - - temp_id=sentence_ori.find(sentence_pre[i]) - if temp_id<0: - #print('ori:',sentence_ori) - print('resotr index error:',sentence_pre[i]) - new_sentence+=sentence_ori[0:temp_id] - - restore_sid=len(new_sentence) - restore_eid=len(new_sentence)+len(sentence_pre[i]) - each_word_id.append([str(restore_sid),str(restore_eid)]) - new_sentence+=sentence_ori[temp_id:temp_id+len(sentence_pre[i])] - sentence_ori=sentence_ori[temp_id+len(sentence_pre[i]):] -# print('each_word:',each_word_id) - for pre_ele in pre_result: - temp_pre_result=[each_word_id[int(pre_ele[0])][0],each_word_id[int(pre_ele[1])][1],pre_ele[2]] - if temp_pre_result not in restore_result: - restore_result.append(temp_pre_result) - else: - sentence_pre=sent_ele[0].lower() - sentence_pre=sentence_pre.split() - - for i in range(0,len(sentence_pre)): - - temp_id=sentence_ori.find(sentence_pre[i]) - if temp_id<0: - print('resotr index error:',sentence_pre[i]) - new_sentence+=sentence_ori[0:temp_id] - new_sentence+=sentence_ori[temp_id:temp_id+len(sentence_pre[i])] - sentence_ori=sentence_ori[temp_id+len(sentence_pre[i]):] - #print('resotre:',restore_result) - return restore_result - -def BERT_BIO_tag_entity(pre_BIO): - sentences=pre_BIO.strip().split('\n\n') - - pre_result=[] - for sent in sentences: - tokens=sent.split('\n') - pre_entity=[] - pre_start,pre_end=0,0 - sent_text='' - for i in range(1,len(tokens)-1): - segs=tokens[i].split('\t') - sent_text+=segs[0]+' ' - # generate prediction entity - if segs[2].startswith('B-')>0: - pre_start=i - pre_type=segs[2][2:] - if i+1>=len(tokens): # the last word - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - else: # non last word - next_seg=tokens[i+1].split('\t') - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2].startswith('I-')>0: - if i==0 and i+10 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif i==0 and i+1==len(tokens):# only one word: - pre_start=i - pre_type=segs[2][2:] - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif i+1>=len(tokens): # the last word - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif i+1< len(tokens): # non last word - next_seg=tokens[i+1].split('\t') - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2]=='O': - pass - pre_result.append([sent_text.rstrip(),pre_entity]) - - - #print(pre_result) - return pre_result - -def BERT_BIO_tag_entity_revised(pre_BIO): - print('revised version') - sentences=pre_BIO.strip().split('\n\n') - - pre_result=[] - for sent in sentences: - tokens=sent.split('\n') - pre_entity=[] - pre_start,pre_end=0,0 - sent_text='' - for i in range(1,len(tokens)-1): - segs=tokens[i].split('\t') - sent_text+=segs[0]+' ' - # generate prediction entity - if segs[2].startswith('B-')>0: - pre_start=i - pre_type=segs[2][2:] - if i+1>=len(tokens)-1: # the last word - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - else: # non last word - next_seg=tokens[i+1].split('\t') - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2].startswith('I-')>0: - if i==1 and i+10 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif i==1 and i+1==len(tokens)-1:# only one word: - pre_start=i - pre_type=segs[2][2:] - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif i+1>=len(tokens)-1: # the last word - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif i+1< len(tokens)-1: # non last word - next_seg=tokens[i+1].split('\t') - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2]=='O': - pass - pre_result.append([sent_text.rstrip(),pre_entity]) - - - #print(pre_result) - return pre_result - -# only predict on the first token of the ori word -def BERT_BIO_tag_entity_word(pre_BIO): - sentences=pre_BIO.strip().split('\n\n') - - pre_result=[] - for sent in sentences: - tokens=sent.split('\n') - pre_entity=[] - pre_start,pre_end=0,0 - sent_text='' - i=1 - while i< len(tokens)-1: - # for i in range(1,len(tokens)-1): - segs=tokens[i].split('\t') - sent_text+=segs[0]+' ' - # generate prediction entity - if segs[2].startswith('B-')>0: - pre_start=i - pre_type=segs[2][2:] - if i+1>=len(tokens)-1: # the last word - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - else: # non last word - #pass a word - sub_segs=tokens[i+1].split('\t') - while(sub_segs[0].find('##')==0): - i+=1 - sent_text+=sub_segs[0]+' ' - sub_segs=tokens[i+1].split('\t') - - - next_seg=tokens[i+1].split('\t') - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2].startswith('I-')>0: - if i==1 and i+10 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif i==1 and i+1==len(tokens)-1:# only one word: - pre_start=i - pre_type=segs[2][2:] - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif i+1>=len(tokens)-1: # the last word - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif i+1< len(tokens)-1: # non last word - - last_seg=tokens[i-1].split('\t') - if last_seg[2]=='O': - pre_start=i - pre_type=segs[2][2:] - #pass a word - sub_segs=tokens[i+1].split('\t') - while(sub_segs[0].find('##')==0): - i+=1 - sent_text+=sub_segs[0]+' ' - sub_segs=tokens[i+1].split('\t') - next_seg=tokens[i+1].split('\t') - if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': - pre_end=i - pre_entity.append([pre_start-1,pre_end-1,pre_type]) - elif next_seg[2].startswith('I-')>0: - pass - elif segs[2]=='O': - pass - i+=1 - pre_result.append([sent_text.rstrip(),pre_entity]) - - - #print(pre_result) - return pre_result - - -def BERT_restore_index_fn(ori_text,file_pre): - - # input_result=BERT_BIO_tag_entity_revised(file_pre) - input_result=BERT_BIO_tag_entity_word(file_pre) - #print(input_result) - - - new_sentence='' - restore_result=[] - - sentence_ori=ori_text.lower() - - for sent_ele in input_result: - - #print(pre_lines) -# print(sentence_ori) - if len(sent_ele[1])>0: - #print(pre_lines) - sentence_pre=sent_ele[0].lower() - sentence_pre=sentence_pre.split() - - pre_result=sent_ele[1] - - - restore_sid=0 - restore_eid=0 - each_word_id=[] - - - for i in range(0,len(sentence_pre)): - if sentence_pre[i][0:2]=="##": - sentence_pre[i]=sentence_pre[i][2:] - temp_id=sentence_ori.find(sentence_pre[i]) - if temp_id<0: - #print('ori:',sentence_ori) - print('resotr index error:',sentence_pre[i]) - new_sentence+=sentence_ori[0:temp_id] - - restore_sid=len(new_sentence) - restore_eid=len(new_sentence)+len(sentence_pre[i]) - each_word_id.append([str(restore_sid),str(restore_eid)]) - new_sentence+=sentence_ori[temp_id:temp_id+len(sentence_pre[i])] - sentence_ori=sentence_ori[temp_id+len(sentence_pre[i]):] -# print('each_word:',each_word_id) - for pre_ele in pre_result: - temp_pre_result=[each_word_id[int(pre_ele[0])][0],each_word_id[int(pre_ele[1])][1],pre_ele[2]] - if temp_pre_result not in restore_result: - restore_result.append(temp_pre_result) - else: - sentence_pre=sent_ele[0].lower() - sentence_pre=sentence_pre.split() - - for i in range(0,len(sentence_pre)): - if sentence_pre[i][0:2]=="##": - sentence_pre[i]=sentence_pre[i][2:] - temp_id=sentence_ori.find(sentence_pre[i]) - if temp_id<0: - print('resotr index error:',sentence_pre[i]) - new_sentence+=sentence_ori[0:temp_id] - new_sentence+=sentence_ori[temp_id:temp_id+len(sentence_pre[i])] - sentence_ori=sentence_ori[temp_id+len(sentence_pre[i]):] - #print('resotre:',restore_result) - return restore_result -if __name__=='__main__': - path='//panfs/pan1/bionlp/lulab/luoling/OpenBioIE_project/models/' - fin=open(path+'devout_test.txt','r',encoding='utf-8') - file_pre=fin.read() - ori_text="D90A-SOD1 mediated amyotrophic lateral sclerosis: a single founder for all cases with evidence for a Cis-acting disease modifier in the recessive haplotype. More than 100 different heterozygous mutations in copper/zinc superoxide dismutase (SOD1) have been found in patients with amyotrophic lateral sclerosis (ALS), a fatal neurodegenerative disease. Uniquely, D90A-SOD1 has been identified in recessive, dominant and apparently sporadic pedigrees. The phenotype of homozygotes is stereotyped with an extended survival, whereas that of affected heterozygotes varies. The frequency of D90A-SOD1 is 50 times higher in Scandinavia (2.5%) than elsewhere, though ALS prevalence is not raised there. Our earlier study indicated separate founders for recessive and dominant/sporadic ALS and we proposed a disease-modifying factor linked to the recessive mutation. Here we have doubled our sample set and employed novel markers to characterise the mutation's origin and localise any modifying factor. Linkage disequilibrium analysis indicates that D90A homozygotes and heterozygotes share a rare haplotype and are all descended from a single ancient founder (alpha 0.974) c.895 generations ago. Homozygotes arose subsequently only c.63 generations ago (alpha 0.878). Recombination has reduced the region shared by recessive kindreds to 97-265 kb around SOD1, excluding all neighbouring genes. We propose that a cis-acting regulatory polymorphism has arisen close to D90A-SOD1 in the recessive founder, which decreases ALS susceptibility in heterozygotes and slows disease progression." - NN_restore_index_fn(ori_text,file_pre) +# -*- coding: utf-8 -*- +""" +Created on Fri Mar 5 10:40:08 2021 + +@author: luol2 +""" + +# -*- coding: utf-8 -*- +""" +Created on Sun Jun 14 17:19:02 2020 + +@author: luol2 +""" + +import io +import sys + +# from BIO format to entity,list line is sentence, follwing the entity(start, end, text, entity, type) +def NN_BIO_tag_entity(pre_BIO): + sentences=pre_BIO.strip().split('\n\n') + + pre_result=[] + #print(sentences) + for sent in sentences: + tokens=sent.split('\n') + pre_entity=[] + pre_start,pre_end=0,0 + sent_text='' + for i in range(0,len(tokens)): + segs=tokens[i].split('\t') + sent_text+=segs[0]+' ' + if len(segs)<3: + continue + #print(tokens) + # generate prediction entity + if segs[2].startswith('B-')>0: + pre_start=i + pre_type=segs[2][2:] + if i+1>=len(tokens): # the last word + pre_end=i + pre_entity.append([pre_start,pre_end,pre_type]) + else: # non last word + next_seg=tokens[i+1].split('\t') + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start,pre_end,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2].startswith('I-')>0: + if i==0 and i+10 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start,pre_end,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif i==0 and i+1==len(tokens):# only one word: + pre_start=i + pre_type=segs[2][2:] + pre_end=i + pre_entity.append([pre_start,pre_end,pre_type]) + elif i+1>=len(tokens): # the last word + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + pre_end=i + pre_entity.append([pre_start,pre_end,pre_type]) + elif i+1< len(tokens): # non last word + next_seg=tokens[i+1].split('\t') + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start,pre_end,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2]=='O': + pass + pre_result.append([sent_text.rstrip(),pre_entity]) + + + # print(pre_entity) + return pre_result + +def NN_restore_index_fn(ori_text,file_pre): + + input_result=NN_BIO_tag_entity(file_pre) + #print(input_result) + + + new_sentence='' + restore_result=[] + + sentence_ori=ori_text.lower() + + for sent_ele in input_result: + + #print(pre_lines) +# print(sentence_ori) + if len(sent_ele[1])>0: + #print(pre_lines) + sentence_pre=sent_ele[0].lower() + sentence_pre=sentence_pre.split() + + pre_result=sent_ele[1] + + + restore_sid=0 + restore_eid=0 + each_word_id=[] + + for i in range(0,len(sentence_pre)): + + temp_id=sentence_ori.find(sentence_pre[i]) + if temp_id<0: + #print('ori:',sentence_ori) + print('resotr index error:',sentence_pre[i]) + new_sentence+=sentence_ori[0:temp_id] + + restore_sid=len(new_sentence) + restore_eid=len(new_sentence)+len(sentence_pre[i]) + each_word_id.append([str(restore_sid),str(restore_eid)]) + new_sentence+=sentence_ori[temp_id:temp_id+len(sentence_pre[i])] + sentence_ori=sentence_ori[temp_id+len(sentence_pre[i]):] +# print('each_word:',each_word_id) + for pre_ele in pre_result: + temp_pre_result=[each_word_id[int(pre_ele[0])][0],each_word_id[int(pre_ele[1])][1],pre_ele[2]] + if temp_pre_result not in restore_result: + restore_result.append(temp_pre_result) + else: + sentence_pre=sent_ele[0].lower() + sentence_pre=sentence_pre.split() + + for i in range(0,len(sentence_pre)): + + temp_id=sentence_ori.find(sentence_pre[i]) + if temp_id<0: + print('resotr index error:',sentence_pre[i]) + new_sentence+=sentence_ori[0:temp_id] + new_sentence+=sentence_ori[temp_id:temp_id+len(sentence_pre[i])] + sentence_ori=sentence_ori[temp_id+len(sentence_pre[i]):] + #print('resotre:',restore_result) + return restore_result + +def BERT_BIO_tag_entity(pre_BIO): + sentences=pre_BIO.strip().split('\n\n') + + pre_result=[] + for sent in sentences: + tokens=sent.split('\n') + pre_entity=[] + pre_start,pre_end=0,0 + sent_text='' + for i in range(1,len(tokens)-1): + segs=tokens[i].split('\t') + sent_text+=segs[0]+' ' + # generate prediction entity + if segs[2].startswith('B-')>0: + pre_start=i + pre_type=segs[2][2:] + if i+1>=len(tokens): # the last word + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + else: # non last word + next_seg=tokens[i+1].split('\t') + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2].startswith('I-')>0: + if i==0 and i+10 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif i==0 and i+1==len(tokens):# only one word: + pre_start=i + pre_type=segs[2][2:] + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif i+1>=len(tokens): # the last word + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif i+1< len(tokens): # non last word + next_seg=tokens[i+1].split('\t') + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2]=='O': + pass + pre_result.append([sent_text.rstrip(),pre_entity]) + + + #print(pre_result) + return pre_result + +def BERT_BIO_tag_entity_revised(pre_BIO): + print('revised version') + sentences=pre_BIO.strip().split('\n\n') + + pre_result=[] + for sent in sentences: + tokens=sent.split('\n') + pre_entity=[] + pre_start,pre_end=0,0 + sent_text='' + for i in range(1,len(tokens)-1): + segs=tokens[i].split('\t') + sent_text+=segs[0]+' ' + # generate prediction entity + if segs[2].startswith('B-')>0: + pre_start=i + pre_type=segs[2][2:] + if i+1>=len(tokens)-1: # the last word + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + else: # non last word + next_seg=tokens[i+1].split('\t') + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2].startswith('I-')>0: + if i==1 and i+10 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif i==1 and i+1==len(tokens)-1:# only one word: + pre_start=i + pre_type=segs[2][2:] + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif i+1>=len(tokens)-1: # the last word + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif i+1< len(tokens)-1: # non last word + next_seg=tokens[i+1].split('\t') + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2]=='O': + pass + pre_result.append([sent_text.rstrip(),pre_entity]) + + + #print(pre_result) + return pre_result + +# only predict on the first token of the ori word +def BERT_BIO_tag_entity_word(pre_BIO): + sentences=pre_BIO.strip().split('\n\n') + + pre_result=[] + for sent in sentences: + tokens=sent.split('\n') + pre_entity=[] + pre_start,pre_end=0,0 + sent_text='' + i=1 + while i< len(tokens)-1: + # for i in range(1,len(tokens)-1): + segs=tokens[i].split('\t') + sent_text+=segs[0]+' ' + # generate prediction entity + if segs[2].startswith('B-')>0: + pre_start=i + pre_type=segs[2][2:] + if i+1>=len(tokens)-1: # the last word + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + else: # non last word + #pass a word + sub_segs=tokens[i+1].split('\t') + while(sub_segs[0].find('##')==0): + i+=1 + sent_text+=sub_segs[0]+' ' + sub_segs=tokens[i+1].split('\t') + + + next_seg=tokens[i+1].split('\t') + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2].startswith('I-')>0: + if i==1 and i+10 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif i==1 and i+1==len(tokens)-1:# only one word: + pre_start=i + pre_type=segs[2][2:] + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif i+1>=len(tokens)-1: # the last word + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif i+1< len(tokens)-1: # non last word + + last_seg=tokens[i-1].split('\t') + if last_seg[2]=='O': + pre_start=i + pre_type=segs[2][2:] + #pass a word + sub_segs=tokens[i+1].split('\t') + while(sub_segs[0].find('##')==0): + i+=1 + sent_text+=sub_segs[0]+' ' + sub_segs=tokens[i+1].split('\t') + next_seg=tokens[i+1].split('\t') + if next_seg[2].startswith('B-')>0 or next_seg[2]=='O': + pre_end=i + pre_entity.append([pre_start-1,pre_end-1,pre_type]) + elif next_seg[2].startswith('I-')>0: + pass + elif segs[2]=='O': + pass + i+=1 + pre_result.append([sent_text.rstrip(),pre_entity]) + + + #print(pre_result) + return pre_result + + +def BERT_restore_index_fn(ori_text,file_pre): + + # input_result=BERT_BIO_tag_entity_revised(file_pre) + input_result=BERT_BIO_tag_entity_word(file_pre) + #print(input_result) + + + new_sentence='' + restore_result=[] + + sentence_ori=ori_text.lower() + + for sent_ele in input_result: + + #print(pre_lines) +# print(sentence_ori) + if len(sent_ele[1])>0: + #print(pre_lines) + sentence_pre=sent_ele[0].lower() + sentence_pre=sentence_pre.split() + + pre_result=sent_ele[1] + + + restore_sid=0 + restore_eid=0 + each_word_id=[] + + + for i in range(0,len(sentence_pre)): + if sentence_pre[i][0:2]=="##": + sentence_pre[i]=sentence_pre[i][2:] + temp_id=sentence_ori.find(sentence_pre[i]) + if temp_id<0: + #print('ori:',sentence_ori) + print('resotr index error:',sentence_pre[i]) + new_sentence+=sentence_ori[0:temp_id] + + restore_sid=len(new_sentence) + restore_eid=len(new_sentence)+len(sentence_pre[i]) + each_word_id.append([str(restore_sid),str(restore_eid)]) + new_sentence+=sentence_ori[temp_id:temp_id+len(sentence_pre[i])] + sentence_ori=sentence_ori[temp_id+len(sentence_pre[i]):] +# print('each_word:',each_word_id) + for pre_ele in pre_result: + temp_pre_result=[each_word_id[int(pre_ele[0])][0],each_word_id[int(pre_ele[1])][1],pre_ele[2]] + if temp_pre_result not in restore_result: + restore_result.append(temp_pre_result) + else: + sentence_pre=sent_ele[0].lower() + sentence_pre=sentence_pre.split() + + for i in range(0,len(sentence_pre)): + if sentence_pre[i][0:2]=="##": + sentence_pre[i]=sentence_pre[i][2:] + temp_id=sentence_ori.find(sentence_pre[i]) + if temp_id<0: + print('resotr index error:',sentence_pre[i]) + new_sentence+=sentence_ori[0:temp_id] + new_sentence+=sentence_ori[temp_id:temp_id+len(sentence_pre[i])] + sentence_ori=sentence_ori[temp_id+len(sentence_pre[i]):] + #print('resotre:',restore_result) + return restore_result +if __name__=='__main__': + path='//panfs/pan1/bionlp/lulab/luoling/OpenBioIE_project/models/' + fin=open(path+'devout_test.txt','r',encoding='utf-8') + file_pre=fin.read() + ori_text="D90A-SOD1 mediated amyotrophic lateral sclerosis: a single founder for all cases with evidence for a Cis-acting disease modifier in the recessive haplotype. More than 100 different heterozygous mutations in copper/zinc superoxide dismutase (SOD1) have been found in patients with amyotrophic lateral sclerosis (ALS), a fatal neurodegenerative disease. Uniquely, D90A-SOD1 has been identified in recessive, dominant and apparently sporadic pedigrees. The phenotype of homozygotes is stereotyped with an extended survival, whereas that of affected heterozygotes varies. The frequency of D90A-SOD1 is 50 times higher in Scandinavia (2.5%) than elsewhere, though ALS prevalence is not raised there. Our earlier study indicated separate founders for recessive and dominant/sporadic ALS and we proposed a disease-modifying factor linked to the recessive mutation. Here we have doubled our sample set and employed novel markers to characterise the mutation's origin and localise any modifying factor. Linkage disequilibrium analysis indicates that D90A homozygotes and heterozygotes share a rare haplotype and are all descended from a single ancient founder (alpha 0.974) c.895 generations ago. Homozygotes arose subsequently only c.63 generations ago (alpha 0.878). Recombination has reduced the region shared by recessive kindreds to 97-265 kb around SOD1, excluding all neighbouring genes. We propose that a cis-acting regulatory polymorphism has arisen close to D90A-SOD1 in the recessive founder, which decreases ALS susceptibility in heterozygotes and slows disease progression." + NN_restore_index_fn(ori_text,file_pre) diff --git a/src_python/SpeAss/Evaluation_sa.py b/src_python/SpeAss/Evaluation_sa.py index 02914011a7531e104f8cd375e47b3cd92a30eaea..ef83ac4ce9a1fdf1416180ecfedb8d3a6570fdf7 100644 --- a/src_python/SpeAss/Evaluation_sa.py +++ b/src_python/SpeAss/Evaluation_sa.py @@ -1,396 +1,396 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Mar 1 15:33:54 2021 - -@author: luol2 -""" -# compute metrics using IO prefile -#ignore arg1 -def Rel_Evaluation(prefile): - fin=open(prefile,'r',encoding='utf-8') - all_in=fin.read().strip().split('\n\n') - fin.close() - TP=0 #gold=pre=pos - FP=0 #gold=neg, pre=pos - FN=0 #gold=pos, pre=Neg - for sentence in all_in: - tokens=sentence.split('\n') - entity_id=0 - token_id=0 - temp_gold='O' - temp_pre='O' - while (token_id': - if seg[1]=='O': - temp_gold=seg[1] - else: - temp_gold=seg[1][2:] - if seg[2]=='O': - temp_pre=seg[2] - else: - temp_pre=seg[2][2:] - token_id+=1 - seg=tokens[token_id].split('\t') - while seg[0]!='': - token_id+=1 - seg=tokens[token_id].split('\t') - if seg[1]!='O' and temp_gold=='O': - temp_gold=seg[1][2:] - if seg[2]!='O' and temp_pre=='O': - temp_pre=seg[2][2:] - if temp_pre!='O' and temp_gold!='O' and temp_pre==temp_gold: - TP+=1 - elif temp_pre!='O' and temp_gold!='O' and temp_pre!=temp_gold: - FP+=1 - FN+=1 - elif temp_pre!='O' and temp_gold=='O' : - FP+=1 - elif temp_pre=='O' and temp_gold!='O' : - FN+=1 - temp_pre='O' - temp_gold='O' - - else: - pass - token_id+=1 - # print('TP,FP,FN:',TP,FP,FN) - if TP+FP==0: - P=0 - else: - P=TP/(TP+FP) - if TP+FN==0: - R=0 - else: - R=TP/(TP+FN) - if P+R==0: - F1=0 - else: - F1=2*P*R/(P+R) - print('TP,FP,FN:',TP,FP,FN) - print('P,R,F1:',P,R,F1) - - -def Rel_Evaluation_fn(prefile): - fin=open(prefile,'r',encoding='utf-8') - all_in=fin.read().strip().split('\n\n') - fin.close() - TP=0 #gold=pre=pos - FP=0 #gold=neg, pre=pos - FN=0 #gold=pos, pre=Neg - for sentence in all_in: - tokens=sentence.split('\n') - entity_id=0 - token_id=0 - temp_gold='O' - temp_pre='O' - while (token_id': - if seg[1]=='O': - temp_gold=seg[1] - else: - temp_gold=seg[1][2:] - if seg[2]=='O': - temp_pre=seg[2] - else: - temp_pre=seg[2][2:] - token_id+=1 - seg=tokens[token_id].split('\t') - while seg[0]!='': - token_id+=1 - seg=tokens[token_id].split('\t') - if seg[1]!='O' and temp_gold=='O': - temp_gold=seg[1][2:] - if seg[2]!='O' and temp_pre=='O': - temp_pre=seg[2][2:] - if temp_pre!='O' and temp_gold!='O' and temp_pre==temp_gold: - TP+=1 - elif temp_pre!='O' and temp_gold!='O' and temp_pre!=temp_gold: - FP+=1 - elif temp_pre!='O' and temp_gold=='O' : - FP+=1 - elif temp_pre=='O' and temp_gold!='O' : - FN+=1 - temp_pre='O' - temp_gold='O' - - else: - pass - token_id+=1 - print('TP,FP,FN:',TP,FP,FN) - if TP+FP==0: - P=0 - else: - P=TP/(TP+FP) - if TP+FN==0: - R=0 - else: - R=TP/(TP+FN) - if P+R==0: - F1=0 - else: - F1=2*P*R/(P+R) - # print('TP,FP,FN:',TP,FP,FN) - print('P,R,F1:',P,R,F1) - return F1 - -def Rel_Evaluation_Hugface_fn(prefile,ARG2_label='gene1s'): - fin=open(prefile,'r',encoding='utf-8') - all_in=fin.read().strip().split('\n\n') - fin.close() - TP=0 #gold=pre=pos - FP=0 #gold=neg, pre=pos - FN=0 #gold=pos, pre=Neg - result_dict={}#{'rel type':[TP,FP,FN],...,} - for sentence in all_in: - tokens=sentence.split('\n') - for token in tokens: - seg=token.split('\t') - if seg[0]==ARG2_label: - if seg[1].find('ARG2')>=0: - if seg[2]==seg[1]: - if seg[1] not in result_dict.keys(): - result_dict[seg[1]]=[1,0,0] - else: - result_dict[seg[1]][0]+=1 - TP+=1 - elif seg[2].find('ARG2')>=0: - if seg[1] not in result_dict.keys(): - result_dict[seg[1]]=[0,0,1] - else: - result_dict[seg[1]][2]+=1 - if seg[2] not in result_dict.keys(): - result_dict[seg[2]]=[0,1,0] - else: - result_dict[seg[2]][1]+=1 - FP+=1 - FN+=1 - else: - if seg[1] not in result_dict.keys(): - result_dict[seg[1]]=[0,0,1] - else: - result_dict[seg[1]][2]+=1 - FN+=1 - - else: - if seg[2].find('ARG2')>=0: - if seg[2] not in result_dict.keys(): - result_dict[seg[2]]=[0,1,0] - else: - result_dict[seg[2]][1]+=1 - FP+=1 - # print('TP,FP,FN:',TP,FP,FN) - rel_metrics={} - for rel_type in result_dict.keys(): - if result_dict[rel_type][0]+result_dict[rel_type][1]==0: - p=0 - else: - p=result_dict[rel_type][0]/(result_dict[rel_type][0]+result_dict[rel_type][1]) - if result_dict[rel_type][0]+result_dict[rel_type][2]==0: - r=0 - else: - r=result_dict[rel_type][0]/(result_dict[rel_type][0]+result_dict[rel_type][2]) - if p+r==0: - f1=0 - else: - f1=2*p*r/(p+r) - rel_metrics[rel_type]=[round(p,4),round(r,4),round(f1,4)] - if TP+FP==0: - P=0 - else: - P=TP/(TP+FP) - if TP+FN==0: - R=0 - else: - R=TP/(TP+FN) - if P+R==0: - F1=0 - else: - F1=2*P*R/(P+R) - P=round(P,4) - R=round(R,4) - F1=round(F1,4) - print('mertics:\n',rel_metrics) - print('\nTP,FP,FN:',TP,FP,FN) - print('Overall P,R,F1:',P,R,F1) - return [P,R,F1],rel_metrics - -def Rel_Evaluation_AIO_fn(prefile): - fin=open(prefile,'r',encoding='utf-8') - all_in=fin.read().strip().split('\n\n') - fin.close() - TP=0 #gold=pre=pos - FP=0 #gold=neg, pre=pos - FN=0 #gold=pos, pre=Neg - for sentence in all_in: - tokens=sentence.split('\n') - for token in tokens: - seg=token.split('\t') - if seg[0]=='': - if seg[1].find('ARG2-')>=0: - if seg[2]==seg[1]: - TP+=1 - elif seg[2].find('ARG2-')>=0: - FP+=1 - FN+=1 - else: - FN+=1 - - else: - if seg[2].find('ARG2-')>=0: - FP+=1 - # print('TP,FP,FN:',TP,FP,FN) - if TP+FP==0: - P=0 - else: - P=TP/(TP+FP) - if TP+FN==0: - R=0 - else: - R=TP/(TP+FN) - if P+R==0: - F1=0 - else: - F1=2*P*R/(P+R) - P=round(P,4) - R=round(R,4) - F1=round(F1,4) - print('TP,FP,FN:',TP,FP,FN) - print('P,R,F1:',P,R,F1) - return [P,R,F1] - -def Rel_Evaluation_AIO_GC_fn(prefile): - fin=open(prefile,'r',encoding='utf-8') - all_in=fin.read().strip().split('\n\n') - fin.close() - TP=0 #gold=pre=pos - FP=0 #gold=neg, pre=pos - FN=0 #gold=pos, pre=Neg - for sentence in all_in: - tokens=sentence.split('\n') - for token in tokens: - seg=token.split('\t') - if seg[0]=='': - if seg[1].find('ARG2-')>=0: - if seg[2]==seg[1]: - TP+=1 - elif seg[2].find('ARG2-')>=0: - FP+=1 - FN+=1 - else: - FN+=1 - - else: - if seg[2].find('ARG2-')>=0: - FP+=1 - # print('TP,FP,FN:',TP,FP,FN) - if TP+FP==0: - P=0 - else: - P=TP/(TP+FP) - if TP+FN==0: - R=0 - else: - R=TP/(TP+FN) - if P+R==0: - F1=0 - else: - F1=2*P*R/(P+R) - P=round(P,4) - R=round(R,4) - F1=round(F1,4) - print('TP,FP,FN:',TP,FP,FN) - print('P,R,F1:',P,R,F1) - return [P,R,F1] - -def office_evaluation(goldfile,prefile): - fin_gold=open(goldfile,'r',encoding='utf-8') - all_gold=fin_gold.read().strip().split('\n') - fin_gold.close() - fin_pre=open(prefile,'r',encoding='utf-8') - all_pre=fin_pre.read().strip().split('\n') - fin_pre.close() - - gold_result={}#{'relation type':set(line)} - pre_result={} - all_result={} #{'relation type':[tp,fp,fn]} - for line in all_gold: - seg=line.split('\t') - if seg[1] not in all_result.keys(): - all_result[seg[1]]=[0,0,0] - if seg[1] not in gold_result.keys(): - gold_result[seg[1]]=set() - gold_result[seg[1]].add(line) - else: - gold_result[seg[1]].add(line) - - for line in all_pre: - seg=line.split('\t') - if seg[1] not in pre_result.keys(): - pre_result[seg[1]]=set() - pre_result[seg[1]].add(line) - else: - pre_result[seg[1]].add(line) - - for rel_type in gold_result.keys(): - for gold_ele in gold_result[rel_type]: - if rel_type not in pre_result.keys(): - all_result[rel_type][2]+=1 - else: - if gold_ele in pre_result[rel_type]: - all_result[rel_type][0]+=1 - else: - all_result[rel_type][2]+=1 - if rel_type in pre_result.keys(): - for pre_ele in pre_result[rel_type]: - if pre_ele not in gold_result[rel_type]: - all_result[rel_type][1]+=1 - ave_f=0 - TP,FP,FN=0,0,0 - print(all_result) - for rel_type in all_result.keys(): - TP+=all_result[rel_type][0] - FP+=all_result[rel_type][1] - FN+=all_result[rel_type][2] - tem_p,tem_r,tem_f=0,0,0 - if all_result[rel_type][0]+all_result[rel_type][1]==0: - tem_p=0 - else: - tem_p=all_result[rel_type][0]/(all_result[rel_type][0]+all_result[rel_type][1]) - if all_result[rel_type][0]+all_result[rel_type][2]==0: - tem_r=0 - else: - tem_r=all_result[rel_type][0]/(all_result[rel_type][0]+all_result[rel_type][2]) - if tem_p+tem_r==0: - tem_f=0 - else: - tem_f=2*tem_p*tem_r/(tem_p+tem_r) - ave_f+=tem_f - print('%s:p=%.4f,r=%.4f,f=%.4f' % (rel_type,tem_p,tem_r,tem_f)) - - if TP+FP==0: - P=0 - else: - P=TP/(TP+FP) - if TP+FN==0: - R=0 - else: - R=TP/(TP+FN) - if P+R==0: - F1=0 - else: - F1=2*P*R/(P+R) - ave_f+=tem_f - - print('Overall:') - print('ave_f1:',ave_f/len(all_result)) - print('TP=%d, FP=%d, FN=%d'%(TP,FP,FN)) - print('P=%.4f, R=%.4f, F1=%.4f'%(P,R,F1)) - - -if __name__=='__main__': - path='//panfs/pan1/bionlplab/luol2/BC7DrugProt/results/' - office_evaluation(path+'dev/dev_gold_relations.tsv',path+'drugprot_dev_LSTM-CRF-ES_pre.tsv') - print('............') - Rel_Evaluation_check('//panfs/pan1/bionlplab/luol2/BC7DrugProt/check/dev_pre_temp.conll') +# -*- coding: utf-8 -*- +""" +Created on Mon Mar 1 15:33:54 2021 + +@author: luol2 +""" +# compute metrics using IO prefile +#ignore arg1 +def Rel_Evaluation(prefile): + fin=open(prefile,'r',encoding='utf-8') + all_in=fin.read().strip().split('\n\n') + fin.close() + TP=0 #gold=pre=pos + FP=0 #gold=neg, pre=pos + FN=0 #gold=pos, pre=Neg + for sentence in all_in: + tokens=sentence.split('\n') + entity_id=0 + token_id=0 + temp_gold='O' + temp_pre='O' + while (token_id': + if seg[1]=='O': + temp_gold=seg[1] + else: + temp_gold=seg[1][2:] + if seg[2]=='O': + temp_pre=seg[2] + else: + temp_pre=seg[2][2:] + token_id+=1 + seg=tokens[token_id].split('\t') + while seg[0]!='': + token_id+=1 + seg=tokens[token_id].split('\t') + if seg[1]!='O' and temp_gold=='O': + temp_gold=seg[1][2:] + if seg[2]!='O' and temp_pre=='O': + temp_pre=seg[2][2:] + if temp_pre!='O' and temp_gold!='O' and temp_pre==temp_gold: + TP+=1 + elif temp_pre!='O' and temp_gold!='O' and temp_pre!=temp_gold: + FP+=1 + FN+=1 + elif temp_pre!='O' and temp_gold=='O' : + FP+=1 + elif temp_pre=='O' and temp_gold!='O' : + FN+=1 + temp_pre='O' + temp_gold='O' + + else: + pass + token_id+=1 + # print('TP,FP,FN:',TP,FP,FN) + if TP+FP==0: + P=0 + else: + P=TP/(TP+FP) + if TP+FN==0: + R=0 + else: + R=TP/(TP+FN) + if P+R==0: + F1=0 + else: + F1=2*P*R/(P+R) + print('TP,FP,FN:',TP,FP,FN) + print('P,R,F1:',P,R,F1) + + +def Rel_Evaluation_fn(prefile): + fin=open(prefile,'r',encoding='utf-8') + all_in=fin.read().strip().split('\n\n') + fin.close() + TP=0 #gold=pre=pos + FP=0 #gold=neg, pre=pos + FN=0 #gold=pos, pre=Neg + for sentence in all_in: + tokens=sentence.split('\n') + entity_id=0 + token_id=0 + temp_gold='O' + temp_pre='O' + while (token_id': + if seg[1]=='O': + temp_gold=seg[1] + else: + temp_gold=seg[1][2:] + if seg[2]=='O': + temp_pre=seg[2] + else: + temp_pre=seg[2][2:] + token_id+=1 + seg=tokens[token_id].split('\t') + while seg[0]!='': + token_id+=1 + seg=tokens[token_id].split('\t') + if seg[1]!='O' and temp_gold=='O': + temp_gold=seg[1][2:] + if seg[2]!='O' and temp_pre=='O': + temp_pre=seg[2][2:] + if temp_pre!='O' and temp_gold!='O' and temp_pre==temp_gold: + TP+=1 + elif temp_pre!='O' and temp_gold!='O' and temp_pre!=temp_gold: + FP+=1 + elif temp_pre!='O' and temp_gold=='O' : + FP+=1 + elif temp_pre=='O' and temp_gold!='O' : + FN+=1 + temp_pre='O' + temp_gold='O' + + else: + pass + token_id+=1 + print('TP,FP,FN:',TP,FP,FN) + if TP+FP==0: + P=0 + else: + P=TP/(TP+FP) + if TP+FN==0: + R=0 + else: + R=TP/(TP+FN) + if P+R==0: + F1=0 + else: + F1=2*P*R/(P+R) + # print('TP,FP,FN:',TP,FP,FN) + print('P,R,F1:',P,R,F1) + return F1 + +def Rel_Evaluation_Hugface_fn(prefile,ARG2_label='gene1s'): + fin=open(prefile,'r',encoding='utf-8') + all_in=fin.read().strip().split('\n\n') + fin.close() + TP=0 #gold=pre=pos + FP=0 #gold=neg, pre=pos + FN=0 #gold=pos, pre=Neg + result_dict={}#{'rel type':[TP,FP,FN],...,} + for sentence in all_in: + tokens=sentence.split('\n') + for token in tokens: + seg=token.split('\t') + if seg[0]==ARG2_label: + if seg[1].find('ARG2')>=0: + if seg[2]==seg[1]: + if seg[1] not in result_dict.keys(): + result_dict[seg[1]]=[1,0,0] + else: + result_dict[seg[1]][0]+=1 + TP+=1 + elif seg[2].find('ARG2')>=0: + if seg[1] not in result_dict.keys(): + result_dict[seg[1]]=[0,0,1] + else: + result_dict[seg[1]][2]+=1 + if seg[2] not in result_dict.keys(): + result_dict[seg[2]]=[0,1,0] + else: + result_dict[seg[2]][1]+=1 + FP+=1 + FN+=1 + else: + if seg[1] not in result_dict.keys(): + result_dict[seg[1]]=[0,0,1] + else: + result_dict[seg[1]][2]+=1 + FN+=1 + + else: + if seg[2].find('ARG2')>=0: + if seg[2] not in result_dict.keys(): + result_dict[seg[2]]=[0,1,0] + else: + result_dict[seg[2]][1]+=1 + FP+=1 + # print('TP,FP,FN:',TP,FP,FN) + rel_metrics={} + for rel_type in result_dict.keys(): + if result_dict[rel_type][0]+result_dict[rel_type][1]==0: + p=0 + else: + p=result_dict[rel_type][0]/(result_dict[rel_type][0]+result_dict[rel_type][1]) + if result_dict[rel_type][0]+result_dict[rel_type][2]==0: + r=0 + else: + r=result_dict[rel_type][0]/(result_dict[rel_type][0]+result_dict[rel_type][2]) + if p+r==0: + f1=0 + else: + f1=2*p*r/(p+r) + rel_metrics[rel_type]=[round(p,4),round(r,4),round(f1,4)] + if TP+FP==0: + P=0 + else: + P=TP/(TP+FP) + if TP+FN==0: + R=0 + else: + R=TP/(TP+FN) + if P+R==0: + F1=0 + else: + F1=2*P*R/(P+R) + P=round(P,4) + R=round(R,4) + F1=round(F1,4) + print('mertics:\n',rel_metrics) + print('\nTP,FP,FN:',TP,FP,FN) + print('Overall P,R,F1:',P,R,F1) + return [P,R,F1],rel_metrics + +def Rel_Evaluation_AIO_fn(prefile): + fin=open(prefile,'r',encoding='utf-8') + all_in=fin.read().strip().split('\n\n') + fin.close() + TP=0 #gold=pre=pos + FP=0 #gold=neg, pre=pos + FN=0 #gold=pos, pre=Neg + for sentence in all_in: + tokens=sentence.split('\n') + for token in tokens: + seg=token.split('\t') + if seg[0]=='': + if seg[1].find('ARG2-')>=0: + if seg[2]==seg[1]: + TP+=1 + elif seg[2].find('ARG2-')>=0: + FP+=1 + FN+=1 + else: + FN+=1 + + else: + if seg[2].find('ARG2-')>=0: + FP+=1 + # print('TP,FP,FN:',TP,FP,FN) + if TP+FP==0: + P=0 + else: + P=TP/(TP+FP) + if TP+FN==0: + R=0 + else: + R=TP/(TP+FN) + if P+R==0: + F1=0 + else: + F1=2*P*R/(P+R) + P=round(P,4) + R=round(R,4) + F1=round(F1,4) + print('TP,FP,FN:',TP,FP,FN) + print('P,R,F1:',P,R,F1) + return [P,R,F1] + +def Rel_Evaluation_AIO_GC_fn(prefile): + fin=open(prefile,'r',encoding='utf-8') + all_in=fin.read().strip().split('\n\n') + fin.close() + TP=0 #gold=pre=pos + FP=0 #gold=neg, pre=pos + FN=0 #gold=pos, pre=Neg + for sentence in all_in: + tokens=sentence.split('\n') + for token in tokens: + seg=token.split('\t') + if seg[0]=='': + if seg[1].find('ARG2-')>=0: + if seg[2]==seg[1]: + TP+=1 + elif seg[2].find('ARG2-')>=0: + FP+=1 + FN+=1 + else: + FN+=1 + + else: + if seg[2].find('ARG2-')>=0: + FP+=1 + # print('TP,FP,FN:',TP,FP,FN) + if TP+FP==0: + P=0 + else: + P=TP/(TP+FP) + if TP+FN==0: + R=0 + else: + R=TP/(TP+FN) + if P+R==0: + F1=0 + else: + F1=2*P*R/(P+R) + P=round(P,4) + R=round(R,4) + F1=round(F1,4) + print('TP,FP,FN:',TP,FP,FN) + print('P,R,F1:',P,R,F1) + return [P,R,F1] + +def office_evaluation(goldfile,prefile): + fin_gold=open(goldfile,'r',encoding='utf-8') + all_gold=fin_gold.read().strip().split('\n') + fin_gold.close() + fin_pre=open(prefile,'r',encoding='utf-8') + all_pre=fin_pre.read().strip().split('\n') + fin_pre.close() + + gold_result={}#{'relation type':set(line)} + pre_result={} + all_result={} #{'relation type':[tp,fp,fn]} + for line in all_gold: + seg=line.split('\t') + if seg[1] not in all_result.keys(): + all_result[seg[1]]=[0,0,0] + if seg[1] not in gold_result.keys(): + gold_result[seg[1]]=set() + gold_result[seg[1]].add(line) + else: + gold_result[seg[1]].add(line) + + for line in all_pre: + seg=line.split('\t') + if seg[1] not in pre_result.keys(): + pre_result[seg[1]]=set() + pre_result[seg[1]].add(line) + else: + pre_result[seg[1]].add(line) + + for rel_type in gold_result.keys(): + for gold_ele in gold_result[rel_type]: + if rel_type not in pre_result.keys(): + all_result[rel_type][2]+=1 + else: + if gold_ele in pre_result[rel_type]: + all_result[rel_type][0]+=1 + else: + all_result[rel_type][2]+=1 + if rel_type in pre_result.keys(): + for pre_ele in pre_result[rel_type]: + if pre_ele not in gold_result[rel_type]: + all_result[rel_type][1]+=1 + ave_f=0 + TP,FP,FN=0,0,0 + print(all_result) + for rel_type in all_result.keys(): + TP+=all_result[rel_type][0] + FP+=all_result[rel_type][1] + FN+=all_result[rel_type][2] + tem_p,tem_r,tem_f=0,0,0 + if all_result[rel_type][0]+all_result[rel_type][1]==0: + tem_p=0 + else: + tem_p=all_result[rel_type][0]/(all_result[rel_type][0]+all_result[rel_type][1]) + if all_result[rel_type][0]+all_result[rel_type][2]==0: + tem_r=0 + else: + tem_r=all_result[rel_type][0]/(all_result[rel_type][0]+all_result[rel_type][2]) + if tem_p+tem_r==0: + tem_f=0 + else: + tem_f=2*tem_p*tem_r/(tem_p+tem_r) + ave_f+=tem_f + print('%s:p=%.4f,r=%.4f,f=%.4f' % (rel_type,tem_p,tem_r,tem_f)) + + if TP+FP==0: + P=0 + else: + P=TP/(TP+FP) + if TP+FN==0: + R=0 + else: + R=TP/(TP+FN) + if P+R==0: + F1=0 + else: + F1=2*P*R/(P+R) + ave_f+=tem_f + + print('Overall:') + print('ave_f1:',ave_f/len(all_result)) + print('TP=%d, FP=%d, FN=%d'%(TP,FP,FN)) + print('P=%.4f, R=%.4f, F1=%.4f'%(P,R,F1)) + + +if __name__=='__main__': + path='//panfs/pan1/bionlplab/luol2/BC7DrugProt/results/' + office_evaluation(path+'dev/dev_gold_relations.tsv',path+'drugprot_dev_LSTM-CRF-ES_pre.tsv') + print('............') + Rel_Evaluation_check('//panfs/pan1/bionlplab/luol2/BC7DrugProt/check/dev_pre_temp.conll') diff --git a/src_python/SpeAss/SA_Pubtator_Conll.py b/src_python/SpeAss/SA_Pubtator_Conll.py index 0f299f3abd6d5371dea70722f17d1138cc1dfec3..63e68160b988c147adb6305dc9f8a8946441bcca 100644 --- a/src_python/SpeAss/SA_Pubtator_Conll.py +++ b/src_python/SpeAss/SA_Pubtator_Conll.py @@ -1,494 +1,494 @@ -# -*- coding: utf-8 -*- - -import sys -import io -import stanza -# nlp = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma',package='craft') #package='craft' -nlp = stanza.Pipeline(lang='en', processors={'tokenize': 'spacy'},package='None') #package='craft' -REL_ENT={'arg1':'Species', - 'arg2':'Gene'} - -ENTITY_TAG={'arg1':['arg1s','arg1e'], - 'arg2':['arg2s','arg2e'], - 'gene':['gene1s','gene1e'], - 'species':['species1s','species1e'] - } - -# ssplit token and revise index -def ssplit_token(infile): - fin=open(infile,'r',encoding='utf-8') - fout=io.StringIO() - all_in=fin.read().strip().split('\n\n') - fin.close() - for doc_text in all_in: - lines=doc_text.split('\n') - ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] - pmid=lines[0].split('|t|')[0] - # print(pmid) - entity_all=[] #[[seg0,seg1,...,],[]] - for i in range(2,len(lines)): - seg=lines[i].split('\t') - entity_all.append(seg) - - #ssplit token - doc_stanza = nlp(ori_text) - token_text='' - for sent in doc_stanza.sentences: - for word in sent.words: - if word.text==' ': - pass - # print('token is blank!') - else: - token_text+=word.text+' ' - #token_text=token_text+' ' #sentence split by four blank - - #ori_index map token_index - index_map=[-1]*len(ori_text) - j=0 - space_list=[' ',chr(160),chr(8201),chr(8194),chr(8197),chr(8202)] #空格有好几种,第一个是常用32,第二个shi 160,8201,8194,8197 - for i in range(0,len(ori_text)): - if ori_text[i] in space_list: - pass - elif ori_text[i]==token_text[j]: - #if i>0 and i<285: - # print('=i,j:',i,j,ori_text[i-1:i+1],token_text[j-1:j+1]) - index_map[i]=j - j+=1 - else: - #if i==283: - # print('!i,j:',i,j,ori_text[i-1:i+1],token_text[j-1:j+1]) - j+=1 - temp_log=j - try: - while(ori_text[i]!=token_text[j]): - j+=1 - except: - print('doc',doc_text) - print('token_text:',token_text) - print('error:',ori_text[i-10:i+10],'i:',ori_text[i],'j:',token_text[temp_log],',',token_text[temp_log-10:temp_log+10]) - print(ord(ori_text[i]),ord(' ')) - sys.exit() - index_map[i]=j - j+=1 - # print(index_map) - # token_text=token_text.replace(' ','') - # print(token_text) - fout.write(token_text+'\n') - for ele in entity_all: - if index_map[int(ele[1])]==-1: - new_ents=index_map[int(ele[1])+1] - else: - new_ents=index_map[int(ele[1])] - if index_map[int(ele[2])-1]==-1: - new_ente=index_map[int(ele[2])-1-1]+1 - else: - new_ente=index_map[int(ele[2])-1]+1 - new_ent=token_text[new_ents:new_ente] - if ele[4]=='Species' or ele[4]=='Gene': - fout.write(ele[0]+'\t'+str(new_ents)+'\t'+str(new_ente)+'\t'+new_ent+'\t'+ele[4]+'\t'+ele[5]+'\n') - else: - # print(ele[4]) - fout.write(ele[0]+'\t'+str(new_ents)+'\t'+str(new_ente)+'\t'+new_ent+'\t'+'Gene'+'\t'+ele[5]+'\n') - fout.write('\n') - return fout.getvalue() - - -def corpus_noNest(token_input): - - fin=io.StringIO(token_input) - fout=io.StringIO() - - documents=fin.read().strip().split('\n\n') - fin.close() - total_entity=0 - over_entity=0 - nest_entity=0 - for doc in documents: - lines=doc.split('\n') - context=lines[0] - entity_list=[] - if len(lines)>1: - doc_result={} - for i in range(1,len(lines)): - segs=lines[i].split('\t') - doc_result[lines[i]]=[int(segs[1]),int(segs[2])] - doc_result=sorted(doc_result.items(), key=lambda kv:(kv[1]), reverse=False) - doc_result_sort=[] - for ele in doc_result: - doc_result_sort.append(ele[0]) - - first_entity=doc_result_sort[0].split('\t') - nest_list=[first_entity] - max_eid=int(first_entity[2]) - total_entity+=len(lines)-2 - for i in range(1,len(doc_result_sort)): - segs=doc_result_sort[i].split('\t') - if int(segs[1])> max_eid: - if len(nest_list)==1: - entity_list.append(nest_list[0]) - nest_list=[] - nest_list.append(segs) - if int(segs[2])>max_eid: - max_eid=int(segs[2]) - else: - # print(nest_list) - nest_entity+=len(nest_list)-1 - tem=find_max_entity(nest_list,context)#find max entity - # if len(tem)>1: - # print('max nest >1:',tem) - entity_list.extend(tem) - nest_list=[] - nest_list.append(segs) - if int(segs[2])>max_eid: - max_eid=int(segs[2]) - - else: - nest_list.append(segs) - over_entity+=1 - if int(segs[2])>max_eid: - max_eid=int(segs[2]) - if nest_list!=[]: - if len(nest_list)==1: - entity_list.append(nest_list[0]) - - else: - tem=find_max_entity(nest_list,context)#find max entity - # if len(tem)>1: - # print('max nest >1:',tem) - entity_list.extend(tem) - fout.write(context+'\n') - for ele in entity_list: - if ele[4]=='Gene': - temp_gene={} - gene_ids=ele[5].split(',') - for gene_id in gene_ids: - temp_id=gene_id[gene_id.find('Species:'):-1] - spe_id=temp_id[len('Species:'):] - temp_gene[temp_id]=int(spe_id) - temp_gene_sort=sorted(temp_gene.items(), key=lambda kv:(kv[1]), reverse=False) - final_gene_id='' - for temp_ele in temp_gene_sort: - final_gene_id+=temp_ele[0]+',' - fout.write('\t'.join(ele[:-1])+'\t'+final_gene_id[:-1]+'\n') - else: - fout.write('\t'.join(ele)+'\n') - fout.write('\n') - # print(total_entity,over_entity, nest_entity) - return fout.getvalue() - -def find_max_entity(nest_list,text): - max_len=0 - final_tem=[] - max_index=0 - for i in range(0, len(nest_list)): - if nest_list[i][4] =='Species': - final_tem.append(nest_list[i]) - else: - cur_len=int(nest_list[i][2])-int(nest_list[i][1]) - if cur_len>max_len: - max_len=cur_len - max_index=i - final_tem.append(nest_list[max_index]) - return final_tem - - -def generate_seq_input(nonest_input,outfile): - - fin=io.StringIO(nonest_input) - fout=open(outfile,'w',encoding='utf-8') - all_in=fin.read().strip().split('\n\n') - fin.close() - - final_input=[] - - for doc in all_in: - lines=doc.split('\n') - token_text=lines[0] - pmid=lines[1].split('\t')[0] - # print(pmid) - #read entity and relation - entity_arg1={} #only entity offset - entity_arg2={} #only entity offset - entity_all=[] #all entity infor - - for i in range(1,len(lines)): - seg=lines[i].split('\t') - if seg[4]==REL_ENT['arg1']: - if seg[-1] in entity_arg1.keys(): - entity_arg1[seg[-1]].append([seg[1],seg[2]]) - else: - entity_arg1[seg[-1]]=[[seg[1],seg[2]]] - elif seg[4]==REL_ENT['arg2']: - temp_spes=seg[-1].split(',') - for ele in temp_spes: - gene_spe_id=ele - if gene_spe_id in entity_arg2.keys(): - entity_arg2[gene_spe_id].append([seg[1],seg[2]]) - else: - entity_arg2[gene_spe_id]=[[seg[1],seg[2]]] - - entity_all.append(seg) - # print('\narg1:',entity_arg1) - # print('\narg2:',entity_arg2) - # print('\nall entity:',entity_all) - # for all arg1 to produce inst - for cur_ele in entity_arg1.keys(): - - #1. ner label text - #check cur_ele in relation? - # print(relation_all.keys()) - if cur_ele in entity_arg2.keys(): #pos instance - rel_ent2=entity_arg2[cur_ele] - ner_text='' - text_sid=0 - #print('nonest:',entity_nonest) - for ele_nonest in entity_all: - ent_id=[ele_nonest[1],ele_nonest[2]] - ent_sid=int(ele_nonest[1]) - ent_eid=int(ele_nonest[2]) - # print('sid,eid:',ent_sid,ent_eid) - ent_text=ele_nonest[3] - ent_type=ele_nonest[4] - if ent_sid>=text_sid: - if ent_id in entity_arg1[cur_ele]: - ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG['arg1'][0]+' '+ent_text+ ' '+ENTITY_TAG['arg1'][1]+' ' - else: - if ent_id in rel_ent2: #arg2 entity - if ent_type!=REL_ENT['arg2']: - pass - # print('arg2 is error! not ',REL_ENT['arg2'], ele_nonest) - ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG['arg2'][0]+' '+ent_text+ ' '+ENTITY_TAG['arg2'][1]+' ' - else: - ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG[ent_type.lower()][0]+' '+ent_text+ ' '+ENTITY_TAG[ent_type.lower()][1]+' ' - text_sid=ent_eid - else: - pass - # print('ner entity error!!!',ele_nonest,text_sid) - ner_text+=token_text[text_sid:] - sen_tokens=ner_text.split() - # print('\nner_text:',ner_text) - - #3 produce pos input - - temp_input=[] - token_id=0 - while token_id =0: - temp_input.append(ENTITY_TAG['arg1'][0]+'\tO') - token_id+=1 - while(sen_tokens[token_id]!=ENTITY_TAG['arg1'][1]): - temp_input.append(sen_tokens[token_id]+'\tO') - token_id+=1 - temp_input.append(ENTITY_TAG['arg1'][1]+'\tO') - elif sen_tokens[token_id].find(ENTITY_TAG['arg2'][0])>=0: - temp_input.append(ENTITY_TAG[REL_ENT['arg2'].lower()][0]+'\tARG2') - token_id+=1 - while(sen_tokens[token_id]!=ENTITY_TAG['arg2'][1]): - temp_input.append(sen_tokens[token_id]+'\tARG2') - token_id+=1 - temp_input.append(ENTITY_TAG[REL_ENT['arg2'].lower()][1]+'\tARG2') - elif sen_tokens[token_id].find(ENTITY_TAG['gene'][0])>=0: - temp_input.append(ENTITY_TAG['gene'][0]+'\tO') - token_id+=1 - while(sen_tokens[token_id]!=ENTITY_TAG['gene'][1]): - temp_input.append(sen_tokens[token_id]+'\tO') - token_id+=1 - temp_input.append(ENTITY_TAG['gene'][1]+'\tO') - elif sen_tokens[token_id].find(ENTITY_TAG['species'][0])>=0: - temp_input.append(ENTITY_TAG['species'][0]+'\tO') - token_id+=1 - while(sen_tokens[token_id]!=ENTITY_TAG['species'][1]): - temp_input.append(sen_tokens[token_id]+'\tO') - token_id+=1 - temp_input.append(ENTITY_TAG['species'][1]+'\tO') - else: - if sen_tokens[token_id]=='': - # print('token is none!error!') - pass - else: - temp_input.append(sen_tokens[token_id]+'\tO') - token_id+=1 - - final_input.append('\n'.join(temp_input)) - - else: #neg instance - ner_text='' - text_sid=0 - #print('nonest:',entity_nonest) - for ele_nonest in entity_all: - ent_id=[ele_nonest[1],ele_nonest[2]] - ent_sid=int(ele_nonest[1]) - ent_eid=int(ele_nonest[2]) - # print('sid,eid:',ent_sid,ent_eid) - ent_text=ele_nonest[3] - ent_type=ele_nonest[4] - if ent_sid>=text_sid: - if ent_id in entity_arg1[cur_ele]: - ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG['arg1'][0]+' '+ent_text+ ' '+ENTITY_TAG['arg1'][1]+' ' - else: - ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG[ent_type.lower()][0]+' '+ent_text+ ' '+ENTITY_TAG[ent_type.lower()][1]+' ' - text_sid=ent_eid - else: - pass - # print('ner entity error!!!') - ner_text+=token_text[text_sid:] - sen_tokens=ner_text.split() - # print('\nner_text:',ner_text) - # print('ner_Text') - #3 produce NEG input - - temp_input=[] - token_id=0 - while token_id =0: - temp_input.append(ENTITY_TAG['arg1'][0]+'\tO') - token_id+=1 - while(sen_tokens[token_id]!=ENTITY_TAG['arg1'][1]): - temp_input.append(sen_tokens[token_id]+'\tO') - token_id+=1 - temp_input.append(ENTITY_TAG['arg1'][1]+'\tO') - elif sen_tokens[token_id].find(ENTITY_TAG['gene'][0])>=0: - temp_input.append(ENTITY_TAG['gene'][0]+'\tO') - token_id+=1 - while(sen_tokens[token_id]!=ENTITY_TAG['gene'][1]): - temp_input.append(sen_tokens[token_id]+'\tO') - token_id+=1 - temp_input.append(ENTITY_TAG['gene'][1]+'\tO') - elif sen_tokens[token_id].find(ENTITY_TAG['species'][0])>=0: - temp_input.append(ENTITY_TAG['species'][0]+'\tO') - token_id+=1 - while(sen_tokens[token_id]!=ENTITY_TAG['species'][1]): - temp_input.append(sen_tokens[token_id]+'\tO') - token_id+=1 - temp_input.append(ENTITY_TAG['species'][1]+'\tO') - else: - if sen_tokens[token_id]=='': - print('token is none!error!') - else: - temp_input.append(sen_tokens[token_id]+'\tO') - token_id+=1 - - final_input.append('\n'.join(temp_input)) - # print(entity_nonest) - # sys.exit() - fout.write('\n\n'.join(final_input)) - fout.write('\n') - fout.close() - -def check_entity_pos(line,relations): - - seg=line.split(' ') - stack_ent=[] - # print(seg) - entity_num={'arg1':0,'arg2':0, 'gene':0,'chemical':0} - - temp_arg2=[] - for i in range(0,len(seg)): - if seg[i].find(ENTITY_TAG['gene'][0])>=0: - entity_num['gene']+=1 - stack_ent.append(seg[i]) - elif seg[i].find(ENTITY_TAG['chemical'][0])>=0: - entity_num['chemical']+=1 - stack_ent.append(seg[i]) - # print(stack_ent) - elif seg[i].find(ENTITY_TAG['arg1'][0])>=0: - entity_num['arg1']+=1 - stack_ent.append(seg[i]) - elif seg[i].find(ENTITY_TAG['arg2'][0])>=0: - entity_num['arg2']+=1 - temp_arg2.append(seg[i].split('|')[0]) - stack_ent.append(seg[i]) - elif seg[i].find(ENTITY_TAG['arg1'][1])>=0 or seg[i].find(ENTITY_TAG['arg2'][1])>=0 or seg[i].find(ENTITY_TAG['gene'][1])>=0 or seg[i].find(ENTITY_TAG['chemical'][1])>=0: - stack_ent.pop() - if stack_ent!=[]: - # print('entity no match!',stack_ent) - return(-1,seg,entity_num) - - else: - if entity_num['arg1']!=0: - for arg2_id in relations.keys(): - if arg2_id not in temp_arg2: - # print('\ntemp_arg2:',temp_arg2) - # print('\narg2_id:',arg2_id) - return(0,seg,entity_num) #some arg2 not in sentence - if entity_num['arg2']!=0 and entity_num['arg1']==0: - return(0,seg,entity_num) #only arg2, but no arg1 - return(1,seg,entity_num) - -def check_entity_neg(line): - - seg=line.split(' ') - stack_ent=[] - # print(seg) - entity_num={'arg1':0,'gene':0,'chemical':0} - for i in range(0,len(seg)): - if seg[i].find(ENTITY_TAG['gene'][0])>=0: - entity_num['gene']+=1 - stack_ent.append(seg[i]) - elif seg[i].find(ENTITY_TAG['chemical'][0])>=0: - entity_num['chemical']+=1 - stack_ent.append(seg[i]) - # print(stack_ent) - elif seg[i].find(ENTITY_TAG['arg1'][0])>=0: - entity_num['arg1']+=1 - stack_ent.append(seg[i]) - elif seg[i].find(ENTITY_TAG['arg1'][1])>=0 or seg[i].find(ENTITY_TAG['gene'][1])>=0 or seg[i].find(ENTITY_TAG['chemical'][1])>=0: - stack_ent.pop() - if stack_ent!=[]: - # print('entity no match!',stack_ent) - return(-1,seg,entity_num) - - else: - return(1,seg,entity_num) - -def get_one_entity(nest_list,cur_ent,rel_entity2_id): - max_len=0 - max_entity=[] - final_entity=[] - for i in range(0, len(nest_list)): - if nest_list[i][1]==cur_ent:#current entity - final_entity=[] - max_entity=nest_list[i] - final_entity.append(nest_list[i]) - return(final_entity) - if nest_list[i][1] in rel_entity2_id: #invole rel - final_entity.append(nest_list[i]) - continue - length=int(nest_list[i][4])-int(nest_list[i][3]) - if max_entity==[]: #first entity - max_len=length - max_entity=nest_list[i] - else: - if length>max_len: - if max_entity[2]==REL_ENT['arg1']: - max_len=length - max_entity=nest_list[i] - else: - if nest_list[i][2]==REL_ENT['arg2'] and max_entity[1] not in rel_entity2_id: - max_len=length - max_entity=nest_list[i] - - else: - if nest_list[i][1] in rel_entity2_id: - max_len=length - max_entity=nest_list[i] - elif max_entity[2]==REL_ENT['arg1'] and nest_list[i][2]==REL_ENT['arg2']: - max_len=length - max_entity=nest_list[i] - if final_entity==[]: - final_entity.append(max_entity) - return final_entity - -if __name__=='__main__': - - infile='../../TrainingSet/No505/SA.Train.txt' - outfile='../../TrainingSet/No505/SA.Train.conll' - - #tokenizer - token_input=ssplit_token(infile) - - #filter nest entity - nonest_input=corpus_noNest(token_input) - - # to conll +# -*- coding: utf-8 -*- + +import sys +import io +import stanza +# nlp = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma',package='craft') #package='craft' +nlp = stanza.Pipeline(lang='en', processors={'tokenize': 'spacy'},package='None') #package='craft' +REL_ENT={'arg1':'Species', + 'arg2':'Gene'} + +ENTITY_TAG={'arg1':['arg1s','arg1e'], + 'arg2':['arg2s','arg2e'], + 'gene':['gene1s','gene1e'], + 'species':['species1s','species1e'] + } + +# ssplit token and revise index +def ssplit_token(infile): + fin=open(infile,'r',encoding='utf-8') + fout=io.StringIO() + all_in=fin.read().strip().split('\n\n') + fin.close() + for doc_text in all_in: + lines=doc_text.split('\n') + ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] + pmid=lines[0].split('|t|')[0] + # print(pmid) + entity_all=[] #[[seg0,seg1,...,],[]] + for i in range(2,len(lines)): + seg=lines[i].split('\t') + entity_all.append(seg) + + #ssplit token + doc_stanza = nlp(ori_text) + token_text='' + for sent in doc_stanza.sentences: + for word in sent.words: + if word.text==' ': + pass + # print('token is blank!') + else: + token_text+=word.text+' ' + #token_text=token_text+' ' #sentence split by four blank + + #ori_index map token_index + index_map=[-1]*len(ori_text) + j=0 + space_list=[' ',chr(160),chr(8201),chr(8194),chr(8197),chr(8202)] #空格有好几种,第一个是常用32,第二个shi 160,8201,8194,8197 + for i in range(0,len(ori_text)): + if ori_text[i] in space_list: + pass + elif ori_text[i]==token_text[j]: + #if i>0 and i<285: + # print('=i,j:',i,j,ori_text[i-1:i+1],token_text[j-1:j+1]) + index_map[i]=j + j+=1 + else: + #if i==283: + # print('!i,j:',i,j,ori_text[i-1:i+1],token_text[j-1:j+1]) + j+=1 + temp_log=j + try: + while(ori_text[i]!=token_text[j]): + j+=1 + except: + print('doc',doc_text) + print('token_text:',token_text) + print('error:',ori_text[i-10:i+10],'i:',ori_text[i],'j:',token_text[temp_log],',',token_text[temp_log-10:temp_log+10]) + print(ord(ori_text[i]),ord(' ')) + sys.exit() + index_map[i]=j + j+=1 + # print(index_map) + # token_text=token_text.replace(' ','') + # print(token_text) + fout.write(token_text+'\n') + for ele in entity_all: + if index_map[int(ele[1])]==-1: + new_ents=index_map[int(ele[1])+1] + else: + new_ents=index_map[int(ele[1])] + if index_map[int(ele[2])-1]==-1: + new_ente=index_map[int(ele[2])-1-1]+1 + else: + new_ente=index_map[int(ele[2])-1]+1 + new_ent=token_text[new_ents:new_ente] + if ele[4]=='Species' or ele[4]=='Gene': + fout.write(ele[0]+'\t'+str(new_ents)+'\t'+str(new_ente)+'\t'+new_ent+'\t'+ele[4]+'\t'+ele[5]+'\n') + else: + # print(ele[4]) + fout.write(ele[0]+'\t'+str(new_ents)+'\t'+str(new_ente)+'\t'+new_ent+'\t'+'Gene'+'\t'+ele[5]+'\n') + fout.write('\n') + return fout.getvalue() + + +def corpus_noNest(token_input): + + fin=io.StringIO(token_input) + fout=io.StringIO() + + documents=fin.read().strip().split('\n\n') + fin.close() + total_entity=0 + over_entity=0 + nest_entity=0 + for doc in documents: + lines=doc.split('\n') + context=lines[0] + entity_list=[] + if len(lines)>1: + doc_result={} + for i in range(1,len(lines)): + segs=lines[i].split('\t') + doc_result[lines[i]]=[int(segs[1]),int(segs[2])] + doc_result=sorted(doc_result.items(), key=lambda kv:(kv[1]), reverse=False) + doc_result_sort=[] + for ele in doc_result: + doc_result_sort.append(ele[0]) + + first_entity=doc_result_sort[0].split('\t') + nest_list=[first_entity] + max_eid=int(first_entity[2]) + total_entity+=len(lines)-2 + for i in range(1,len(doc_result_sort)): + segs=doc_result_sort[i].split('\t') + if int(segs[1])> max_eid: + if len(nest_list)==1: + entity_list.append(nest_list[0]) + nest_list=[] + nest_list.append(segs) + if int(segs[2])>max_eid: + max_eid=int(segs[2]) + else: + # print(nest_list) + nest_entity+=len(nest_list)-1 + tem=find_max_entity(nest_list,context)#find max entity + # if len(tem)>1: + # print('max nest >1:',tem) + entity_list.extend(tem) + nest_list=[] + nest_list.append(segs) + if int(segs[2])>max_eid: + max_eid=int(segs[2]) + + else: + nest_list.append(segs) + over_entity+=1 + if int(segs[2])>max_eid: + max_eid=int(segs[2]) + if nest_list!=[]: + if len(nest_list)==1: + entity_list.append(nest_list[0]) + + else: + tem=find_max_entity(nest_list,context)#find max entity + # if len(tem)>1: + # print('max nest >1:',tem) + entity_list.extend(tem) + fout.write(context+'\n') + for ele in entity_list: + if ele[4]=='Gene': + temp_gene={} + gene_ids=ele[5].split(',') + for gene_id in gene_ids: + temp_id=gene_id[gene_id.find('Species:'):-1] + spe_id=temp_id[len('Species:'):] + temp_gene[temp_id]=int(spe_id) + temp_gene_sort=sorted(temp_gene.items(), key=lambda kv:(kv[1]), reverse=False) + final_gene_id='' + for temp_ele in temp_gene_sort: + final_gene_id+=temp_ele[0]+',' + fout.write('\t'.join(ele[:-1])+'\t'+final_gene_id[:-1]+'\n') + else: + fout.write('\t'.join(ele)+'\n') + fout.write('\n') + # print(total_entity,over_entity, nest_entity) + return fout.getvalue() + +def find_max_entity(nest_list,text): + max_len=0 + final_tem=[] + max_index=0 + for i in range(0, len(nest_list)): + if nest_list[i][4] =='Species': + final_tem.append(nest_list[i]) + else: + cur_len=int(nest_list[i][2])-int(nest_list[i][1]) + if cur_len>max_len: + max_len=cur_len + max_index=i + final_tem.append(nest_list[max_index]) + return final_tem + + +def generate_seq_input(nonest_input,outfile): + + fin=io.StringIO(nonest_input) + fout=open(outfile,'w',encoding='utf-8') + all_in=fin.read().strip().split('\n\n') + fin.close() + + final_input=[] + + for doc in all_in: + lines=doc.split('\n') + token_text=lines[0] + pmid=lines[1].split('\t')[0] + # print(pmid) + #read entity and relation + entity_arg1={} #only entity offset + entity_arg2={} #only entity offset + entity_all=[] #all entity infor + + for i in range(1,len(lines)): + seg=lines[i].split('\t') + if seg[4]==REL_ENT['arg1']: + if seg[-1] in entity_arg1.keys(): + entity_arg1[seg[-1]].append([seg[1],seg[2]]) + else: + entity_arg1[seg[-1]]=[[seg[1],seg[2]]] + elif seg[4]==REL_ENT['arg2']: + temp_spes=seg[-1].split(',') + for ele in temp_spes: + gene_spe_id=ele + if gene_spe_id in entity_arg2.keys(): + entity_arg2[gene_spe_id].append([seg[1],seg[2]]) + else: + entity_arg2[gene_spe_id]=[[seg[1],seg[2]]] + + entity_all.append(seg) + # print('\narg1:',entity_arg1) + # print('\narg2:',entity_arg2) + # print('\nall entity:',entity_all) + # for all arg1 to produce inst + for cur_ele in entity_arg1.keys(): + + #1. ner label text + #check cur_ele in relation? + # print(relation_all.keys()) + if cur_ele in entity_arg2.keys(): #pos instance + rel_ent2=entity_arg2[cur_ele] + ner_text='' + text_sid=0 + #print('nonest:',entity_nonest) + for ele_nonest in entity_all: + ent_id=[ele_nonest[1],ele_nonest[2]] + ent_sid=int(ele_nonest[1]) + ent_eid=int(ele_nonest[2]) + # print('sid,eid:',ent_sid,ent_eid) + ent_text=ele_nonest[3] + ent_type=ele_nonest[4] + if ent_sid>=text_sid: + if ent_id in entity_arg1[cur_ele]: + ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG['arg1'][0]+' '+ent_text+ ' '+ENTITY_TAG['arg1'][1]+' ' + else: + if ent_id in rel_ent2: #arg2 entity + if ent_type!=REL_ENT['arg2']: + pass + # print('arg2 is error! not ',REL_ENT['arg2'], ele_nonest) + ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG['arg2'][0]+' '+ent_text+ ' '+ENTITY_TAG['arg2'][1]+' ' + else: + ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG[ent_type.lower()][0]+' '+ent_text+ ' '+ENTITY_TAG[ent_type.lower()][1]+' ' + text_sid=ent_eid + else: + pass + # print('ner entity error!!!',ele_nonest,text_sid) + ner_text+=token_text[text_sid:] + sen_tokens=ner_text.split() + # print('\nner_text:',ner_text) + + #3 produce pos input + + temp_input=[] + token_id=0 + while token_id =0: + temp_input.append(ENTITY_TAG['arg1'][0]+'\tO') + token_id+=1 + while(sen_tokens[token_id]!=ENTITY_TAG['arg1'][1]): + temp_input.append(sen_tokens[token_id]+'\tO') + token_id+=1 + temp_input.append(ENTITY_TAG['arg1'][1]+'\tO') + elif sen_tokens[token_id].find(ENTITY_TAG['arg2'][0])>=0: + temp_input.append(ENTITY_TAG[REL_ENT['arg2'].lower()][0]+'\tARG2') + token_id+=1 + while(sen_tokens[token_id]!=ENTITY_TAG['arg2'][1]): + temp_input.append(sen_tokens[token_id]+'\tARG2') + token_id+=1 + temp_input.append(ENTITY_TAG[REL_ENT['arg2'].lower()][1]+'\tARG2') + elif sen_tokens[token_id].find(ENTITY_TAG['gene'][0])>=0: + temp_input.append(ENTITY_TAG['gene'][0]+'\tO') + token_id+=1 + while(sen_tokens[token_id]!=ENTITY_TAG['gene'][1]): + temp_input.append(sen_tokens[token_id]+'\tO') + token_id+=1 + temp_input.append(ENTITY_TAG['gene'][1]+'\tO') + elif sen_tokens[token_id].find(ENTITY_TAG['species'][0])>=0: + temp_input.append(ENTITY_TAG['species'][0]+'\tO') + token_id+=1 + while(sen_tokens[token_id]!=ENTITY_TAG['species'][1]): + temp_input.append(sen_tokens[token_id]+'\tO') + token_id+=1 + temp_input.append(ENTITY_TAG['species'][1]+'\tO') + else: + if sen_tokens[token_id]=='': + # print('token is none!error!') + pass + else: + temp_input.append(sen_tokens[token_id]+'\tO') + token_id+=1 + + final_input.append('\n'.join(temp_input)) + + else: #neg instance + ner_text='' + text_sid=0 + #print('nonest:',entity_nonest) + for ele_nonest in entity_all: + ent_id=[ele_nonest[1],ele_nonest[2]] + ent_sid=int(ele_nonest[1]) + ent_eid=int(ele_nonest[2]) + # print('sid,eid:',ent_sid,ent_eid) + ent_text=ele_nonest[3] + ent_type=ele_nonest[4] + if ent_sid>=text_sid: + if ent_id in entity_arg1[cur_ele]: + ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG['arg1'][0]+' '+ent_text+ ' '+ENTITY_TAG['arg1'][1]+' ' + else: + ner_text+=token_text[text_sid:ent_sid]+' '+ENTITY_TAG[ent_type.lower()][0]+' '+ent_text+ ' '+ENTITY_TAG[ent_type.lower()][1]+' ' + text_sid=ent_eid + else: + pass + # print('ner entity error!!!') + ner_text+=token_text[text_sid:] + sen_tokens=ner_text.split() + # print('\nner_text:',ner_text) + # print('ner_Text') + #3 produce NEG input + + temp_input=[] + token_id=0 + while token_id =0: + temp_input.append(ENTITY_TAG['arg1'][0]+'\tO') + token_id+=1 + while(sen_tokens[token_id]!=ENTITY_TAG['arg1'][1]): + temp_input.append(sen_tokens[token_id]+'\tO') + token_id+=1 + temp_input.append(ENTITY_TAG['arg1'][1]+'\tO') + elif sen_tokens[token_id].find(ENTITY_TAG['gene'][0])>=0: + temp_input.append(ENTITY_TAG['gene'][0]+'\tO') + token_id+=1 + while(sen_tokens[token_id]!=ENTITY_TAG['gene'][1]): + temp_input.append(sen_tokens[token_id]+'\tO') + token_id+=1 + temp_input.append(ENTITY_TAG['gene'][1]+'\tO') + elif sen_tokens[token_id].find(ENTITY_TAG['species'][0])>=0: + temp_input.append(ENTITY_TAG['species'][0]+'\tO') + token_id+=1 + while(sen_tokens[token_id]!=ENTITY_TAG['species'][1]): + temp_input.append(sen_tokens[token_id]+'\tO') + token_id+=1 + temp_input.append(ENTITY_TAG['species'][1]+'\tO') + else: + if sen_tokens[token_id]=='': + print('token is none!error!') + else: + temp_input.append(sen_tokens[token_id]+'\tO') + token_id+=1 + + final_input.append('\n'.join(temp_input)) + # print(entity_nonest) + # sys.exit() + fout.write('\n\n'.join(final_input)) + fout.write('\n') + fout.close() + +def check_entity_pos(line,relations): + + seg=line.split(' ') + stack_ent=[] + # print(seg) + entity_num={'arg1':0,'arg2':0, 'gene':0,'chemical':0} + + temp_arg2=[] + for i in range(0,len(seg)): + if seg[i].find(ENTITY_TAG['gene'][0])>=0: + entity_num['gene']+=1 + stack_ent.append(seg[i]) + elif seg[i].find(ENTITY_TAG['chemical'][0])>=0: + entity_num['chemical']+=1 + stack_ent.append(seg[i]) + # print(stack_ent) + elif seg[i].find(ENTITY_TAG['arg1'][0])>=0: + entity_num['arg1']+=1 + stack_ent.append(seg[i]) + elif seg[i].find(ENTITY_TAG['arg2'][0])>=0: + entity_num['arg2']+=1 + temp_arg2.append(seg[i].split('|')[0]) + stack_ent.append(seg[i]) + elif seg[i].find(ENTITY_TAG['arg1'][1])>=0 or seg[i].find(ENTITY_TAG['arg2'][1])>=0 or seg[i].find(ENTITY_TAG['gene'][1])>=0 or seg[i].find(ENTITY_TAG['chemical'][1])>=0: + stack_ent.pop() + if stack_ent!=[]: + # print('entity no match!',stack_ent) + return(-1,seg,entity_num) + + else: + if entity_num['arg1']!=0: + for arg2_id in relations.keys(): + if arg2_id not in temp_arg2: + # print('\ntemp_arg2:',temp_arg2) + # print('\narg2_id:',arg2_id) + return(0,seg,entity_num) #some arg2 not in sentence + if entity_num['arg2']!=0 and entity_num['arg1']==0: + return(0,seg,entity_num) #only arg2, but no arg1 + return(1,seg,entity_num) + +def check_entity_neg(line): + + seg=line.split(' ') + stack_ent=[] + # print(seg) + entity_num={'arg1':0,'gene':0,'chemical':0} + for i in range(0,len(seg)): + if seg[i].find(ENTITY_TAG['gene'][0])>=0: + entity_num['gene']+=1 + stack_ent.append(seg[i]) + elif seg[i].find(ENTITY_TAG['chemical'][0])>=0: + entity_num['chemical']+=1 + stack_ent.append(seg[i]) + # print(stack_ent) + elif seg[i].find(ENTITY_TAG['arg1'][0])>=0: + entity_num['arg1']+=1 + stack_ent.append(seg[i]) + elif seg[i].find(ENTITY_TAG['arg1'][1])>=0 or seg[i].find(ENTITY_TAG['gene'][1])>=0 or seg[i].find(ENTITY_TAG['chemical'][1])>=0: + stack_ent.pop() + if stack_ent!=[]: + # print('entity no match!',stack_ent) + return(-1,seg,entity_num) + + else: + return(1,seg,entity_num) + +def get_one_entity(nest_list,cur_ent,rel_entity2_id): + max_len=0 + max_entity=[] + final_entity=[] + for i in range(0, len(nest_list)): + if nest_list[i][1]==cur_ent:#current entity + final_entity=[] + max_entity=nest_list[i] + final_entity.append(nest_list[i]) + return(final_entity) + if nest_list[i][1] in rel_entity2_id: #invole rel + final_entity.append(nest_list[i]) + continue + length=int(nest_list[i][4])-int(nest_list[i][3]) + if max_entity==[]: #first entity + max_len=length + max_entity=nest_list[i] + else: + if length>max_len: + if max_entity[2]==REL_ENT['arg1']: + max_len=length + max_entity=nest_list[i] + else: + if nest_list[i][2]==REL_ENT['arg2'] and max_entity[1] not in rel_entity2_id: + max_len=length + max_entity=nest_list[i] + + else: + if nest_list[i][1] in rel_entity2_id: + max_len=length + max_entity=nest_list[i] + elif max_entity[2]==REL_ENT['arg1'] and nest_list[i][2]==REL_ENT['arg2']: + max_len=length + max_entity=nest_list[i] + if final_entity==[]: + final_entity.append(max_entity) + return final_entity + +if __name__=='__main__': + + infile='../../TrainingSet/No505/SA.Train.txt' + outfile='../../TrainingSet/No505/SA.Train.conll' + + #tokenizer + token_input=ssplit_token(infile) + + #filter nest entity + nonest_input=corpus_noNest(token_input) + + # to conll generate_seq_input(nonest_input,outfile) \ No newline at end of file diff --git a/src_python/SpeAss/ml_tagging_score_sa.py b/src_python/SpeAss/ml_tagging_score_sa.py index e396ca0ed8b233103439137d1a017259006f867c..3a9c4d47eac4fae66dd0c809d3588fb0ba98bbc6 100644 --- a/src_python/SpeAss/ml_tagging_score_sa.py +++ b/src_python/SpeAss/ml_tagging_score_sa.py @@ -1,220 +1,220 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Jan 7 09:29:46 2022 - -@author: luol2 - -machine learning tagging - -""" - - -import time -import io - -from src_python.SpeAss.processing_data_sa import ml_intext_fn,out_BIO_BERT_softmax_score_fn -import tensorflow as tf -gpu = tf.config.list_physical_devices('GPU') -print("Num GPUs Available: ", len(gpu)) -if len(gpu) > 0: - tf.config.experimental.set_memory_growth(gpu[0], True) -#tf.compat.v1.disable_eager_execution() - -REL_ENT={'arg1':'Species', - 'arg2':'Gene'} - -entity_tag={'arg1':['arg1s','arg1e'], - 'gene':['gene1s','gene1e'], - 'species':['species1s','species1e'] - } - -def input_preprocess_notoken(doc_text): - final_input=[] - final_id=[] - - lines=doc_text.split('\n') - token_text=lines[0] - pmid=lines[1].split('\t')[0] - entity_arg1={} #{species_id:[[spe_sid1,sep_eid1],[...]]} - entity_all=[] - for i in range(1,len(lines)): - seg=lines[i].split('\t') - if seg[6]==REL_ENT['arg1']: - if seg[-1] in entity_arg1.keys(): - entity_arg1[seg[-1]].append([seg[3],seg[4]]) - else: - entity_arg1[seg[-1]]=[[seg[3],seg[4]]] - entity_all.append(seg) - - #print(token_text) - #print(entity_chemical) - #generate input instance - for cur_ele in entity_arg1: - - #2. ner label text - ner_text='' - text_sid=0 - #print('nonest:',entity_nonest) - for ele_nonest in entity_all: - ent_id=[ele_nonest[3],ele_nonest[4]] - ent_spe_id=ele_nonest[-1] - ent_sid=int(ele_nonest[3]) - ent_eid=int(ele_nonest[4]) - # print('sid,eid:',ent_sid,ent_eid) - ent_text=ele_nonest[5] - ent_type=ele_nonest[6] - if ent_sid>=text_sid: - # if token_text[ent_sid:ent_eid]!=ent_text: - # print('error!index_text,entext:',token_text[ent_sid:ent_eid],ent_text) - if ent_id in entity_arg1[cur_ele]: #is species - ner_text+=token_text[text_sid:ent_sid]+' '+ent_spe_id+'|'+entity_tag['arg1'][0]+' '+ent_text+' '+entity_tag['arg1'][1]+' ' - else: - ner_text+=token_text[text_sid:ent_sid]+' '+str(ent_sid)+'-'+str(ent_eid)+'|'+entity_tag[ent_type.lower()][0]+' '+ent_text+' '+entity_tag[ent_type.lower()][1]+' ' - text_sid=ent_eid - ner_text+=token_text[text_sid:] - sen_tokens=ner_text.split() - #print('\nner_text:',ner_text) - - #3. produce input - temp_input=[] - temp_id={'species':'','gene':[]} - for sen_token in sen_tokens: - if sen_token.find(entity_tag['arg1'][0])>=0: - en_id=sen_token.split('|')[0] - temp_id['species']=en_id - temp_input.append(entity_tag['arg1'][0]+'\tO') - elif sen_token.find(entity_tag['gene'][0])>=0: - en_id=sen_token.split('|')[0] - temp_id['gene'].append(en_id) - temp_input.append(entity_tag['gene'][0]+'\tO') - elif sen_token.find(entity_tag['species'][0])>=0: - en_id=sen_token.split('|')[0] - # temp_id.append(en_id) - temp_input.append(entity_tag['species'][0]+'\tO') - else: - if sen_token=='': - # print('token is none!error!') - pass - else: - temp_input.append(sen_token+'\tO') - final_input.append('\n'.join(temp_input)) - final_id.append(temp_id) - - # print(entity_nonest) - return final_input,final_id,entity_all,pmid - - -def ml_tagging(ml_input,nn_model): - - test_set,test_label = ml_intext_fn(ml_input) - test_x,test_y, test_bert_text_label=nn_model.rep.load_data_hugface(test_set,test_label,word_max_len=nn_model.maxlen,label_type='softmax') - test_pre = nn_model.model.predict(test_x) - ml_out=out_BIO_BERT_softmax_score_fn(test_pre,test_bert_text_label,nn_model.rep.index_2_label) - return ml_out - -def output_rel(ml_output,entity_map,pmid): - fin=io.StringIO(ml_output) - alltexts=fin.read().strip().split('\n\n') - fin.close() - final_out={} #{'sid-eid':[spechies id]} - for sen_id,sentence in enumerate(alltexts): - tokens=sentence.split('\n') - gene_entity_id=0 - token_id=0 - arg1='' - arg2_list=[] #[[ID, score],[id,score]] - while (token_id=len(tokens): - break - seg=tokens[token_id].split('\t') - while seg[0]!=entity_tag['arg1'][1]: - token_id+=1 - if token_id >=len(tokens): - break - seg=tokens[token_id].split('\t') - elif seg[0]==entity_tag[REL_ENT['arg2'].lower()][0]: - temp_rel=seg[-2] - temp_score=seg[-1] - arg2_id=entity_map[sen_id]['gene'][gene_entity_id] - gene_entity_id+=1 - token_id+=1 - if token_id >=len(tokens): - break - seg=tokens[token_id].split('\t') - while seg[0]!=entity_tag[REL_ENT['arg2'].lower()][1]: - token_id+=1 - if token_id >=len(tokens): - break - seg=tokens[token_id].split('\t') - if seg[-2].find('ARG2')>=0 and temp_rel.find('ARG2')<0: - temp_rel=seg[-2] - temp_score=seg[-1] - if temp_rel.find('ARG2')>=0: - arg2_list.append([arg2_id,temp_score]) - elif seg[0]==entity_tag[REL_ENT['arg1'].lower()][0]: - token_id+=1 - if token_id >=len(tokens): - break - seg=tokens[token_id].split('\t') - while seg[0]!=entity_tag[REL_ENT['arg1'].lower()][1]: - token_id+=1 - if token_id >=len(tokens): - break - seg=tokens[token_id].split('\t') - - else: - pass - token_id+=1 - #print(arg1,arg2_list) - if arg2_list!=[] and arg1!='': - for arg2_ele in arg2_list: - if arg2_ele[0] not in final_out.keys(): - final_out[arg2_ele[0]]=[arg1+'|'+arg2_ele[1]] - else: - final_out[arg2_ele[0]].append(arg1+'|'+arg2_ele[1]) - return(final_out) - -def NER_Tag(doc_in,nn_model): - - #1. preprocess input, input_text:conll格式, input_entity:相应的实体列表 - #print(doc_in) - input_text,entity_index,entity_all,pmid=input_preprocess_notoken(doc_in) - # print('pmid:',pmid) - # print('\entity_index:',entity_index) - - - #2. ml tagging - if input_text!=[]: - ml_pre=ml_tagging(input_text,nn_model) - #print('\noutput:') - #print(ml_pre) - - #3.generate output - final_output=output_rel(ml_pre,entity_index,pmid) - else: - final_output={} - return final_output,entity_all - - - - - - - - - - - - - - - - - - - - +# -*- coding: utf-8 -*- +""" +Created on Fri Jan 7 09:29:46 2022 + +@author: luol2 + +machine learning tagging + +""" + + +import time +import io + +from src_python.SpeAss.processing_data_sa import ml_intext_fn,out_BIO_BERT_softmax_score_fn +import tensorflow as tf +gpu = tf.config.list_physical_devices('GPU') +print("Num GPUs Available: ", len(gpu)) +if len(gpu) > 0: + tf.config.experimental.set_memory_growth(gpu[0], True) +#tf.compat.v1.disable_eager_execution() + +REL_ENT={'arg1':'Species', + 'arg2':'Gene'} + +entity_tag={'arg1':['arg1s','arg1e'], + 'gene':['gene1s','gene1e'], + 'species':['species1s','species1e'] + } + +def input_preprocess_notoken(doc_text): + final_input=[] + final_id=[] + + lines=doc_text.split('\n') + token_text=lines[0] + pmid=lines[1].split('\t')[0] + entity_arg1={} #{species_id:[[spe_sid1,sep_eid1],[...]]} + entity_all=[] + for i in range(1,len(lines)): + seg=lines[i].split('\t') + if seg[6]==REL_ENT['arg1']: + if seg[-1] in entity_arg1.keys(): + entity_arg1[seg[-1]].append([seg[3],seg[4]]) + else: + entity_arg1[seg[-1]]=[[seg[3],seg[4]]] + entity_all.append(seg) + + #print(token_text) + #print(entity_chemical) + #generate input instance + for cur_ele in entity_arg1: + + #2. ner label text + ner_text='' + text_sid=0 + #print('nonest:',entity_nonest) + for ele_nonest in entity_all: + ent_id=[ele_nonest[3],ele_nonest[4]] + ent_spe_id=ele_nonest[-1] + ent_sid=int(ele_nonest[3]) + ent_eid=int(ele_nonest[4]) + # print('sid,eid:',ent_sid,ent_eid) + ent_text=ele_nonest[5] + ent_type=ele_nonest[6] + if ent_sid>=text_sid: + # if token_text[ent_sid:ent_eid]!=ent_text: + # print('error!index_text,entext:',token_text[ent_sid:ent_eid],ent_text) + if ent_id in entity_arg1[cur_ele]: #is species + ner_text+=token_text[text_sid:ent_sid]+' '+ent_spe_id+'|'+entity_tag['arg1'][0]+' '+ent_text+' '+entity_tag['arg1'][1]+' ' + else: + ner_text+=token_text[text_sid:ent_sid]+' '+str(ent_sid)+'-'+str(ent_eid)+'|'+entity_tag[ent_type.lower()][0]+' '+ent_text+' '+entity_tag[ent_type.lower()][1]+' ' + text_sid=ent_eid + ner_text+=token_text[text_sid:] + sen_tokens=ner_text.split() + #print('\nner_text:',ner_text) + + #3. produce input + temp_input=[] + temp_id={'species':'','gene':[]} + for sen_token in sen_tokens: + if sen_token.find(entity_tag['arg1'][0])>=0: + en_id=sen_token.split('|')[0] + temp_id['species']=en_id + temp_input.append(entity_tag['arg1'][0]+'\tO') + elif sen_token.find(entity_tag['gene'][0])>=0: + en_id=sen_token.split('|')[0] + temp_id['gene'].append(en_id) + temp_input.append(entity_tag['gene'][0]+'\tO') + elif sen_token.find(entity_tag['species'][0])>=0: + en_id=sen_token.split('|')[0] + # temp_id.append(en_id) + temp_input.append(entity_tag['species'][0]+'\tO') + else: + if sen_token=='': + # print('token is none!error!') + pass + else: + temp_input.append(sen_token+'\tO') + final_input.append('\n'.join(temp_input)) + final_id.append(temp_id) + + # print(entity_nonest) + return final_input,final_id,entity_all,pmid + + +def ml_tagging(ml_input,nn_model): + + test_set,test_label = ml_intext_fn(ml_input) + test_x,test_y, test_bert_text_label=nn_model.rep.load_data_hugface(test_set,test_label,word_max_len=nn_model.maxlen,label_type='softmax') + test_pre = nn_model.model.predict(test_x) + ml_out=out_BIO_BERT_softmax_score_fn(test_pre,test_bert_text_label,nn_model.rep.index_2_label) + return ml_out + +def output_rel(ml_output,entity_map,pmid): + fin=io.StringIO(ml_output) + alltexts=fin.read().strip().split('\n\n') + fin.close() + final_out={} #{'sid-eid':[spechies id]} + for sen_id,sentence in enumerate(alltexts): + tokens=sentence.split('\n') + gene_entity_id=0 + token_id=0 + arg1='' + arg2_list=[] #[[ID, score],[id,score]] + while (token_id=len(tokens): + break + seg=tokens[token_id].split('\t') + while seg[0]!=entity_tag['arg1'][1]: + token_id+=1 + if token_id >=len(tokens): + break + seg=tokens[token_id].split('\t') + elif seg[0]==entity_tag[REL_ENT['arg2'].lower()][0]: + temp_rel=seg[-2] + temp_score=seg[-1] + arg2_id=entity_map[sen_id]['gene'][gene_entity_id] + gene_entity_id+=1 + token_id+=1 + if token_id >=len(tokens): + break + seg=tokens[token_id].split('\t') + while seg[0]!=entity_tag[REL_ENT['arg2'].lower()][1]: + token_id+=1 + if token_id >=len(tokens): + break + seg=tokens[token_id].split('\t') + if seg[-2].find('ARG2')>=0 and temp_rel.find('ARG2')<0: + temp_rel=seg[-2] + temp_score=seg[-1] + if temp_rel.find('ARG2')>=0: + arg2_list.append([arg2_id,temp_score]) + elif seg[0]==entity_tag[REL_ENT['arg1'].lower()][0]: + token_id+=1 + if token_id >=len(tokens): + break + seg=tokens[token_id].split('\t') + while seg[0]!=entity_tag[REL_ENT['arg1'].lower()][1]: + token_id+=1 + if token_id >=len(tokens): + break + seg=tokens[token_id].split('\t') + + else: + pass + token_id+=1 + #print(arg1,arg2_list) + if arg2_list!=[] and arg1!='': + for arg2_ele in arg2_list: + if arg2_ele[0] not in final_out.keys(): + final_out[arg2_ele[0]]=[arg1+'|'+arg2_ele[1]] + else: + final_out[arg2_ele[0]].append(arg1+'|'+arg2_ele[1]) + return(final_out) + +def NER_Tag(doc_in,nn_model): + + #1. preprocess input, input_text:conll格式, input_entity:相应的实体列表 + #print(doc_in) + input_text,entity_index,entity_all,pmid=input_preprocess_notoken(doc_in) + # print('pmid:',pmid) + # print('\entity_index:',entity_index) + + + #2. ml tagging + if input_text!=[]: + ml_pre=ml_tagging(input_text,nn_model) + #print('\noutput:') + #print(ml_pre) + + #3.generate output + final_output=output_rel(ml_pre,entity_index,pmid) + else: + final_output={} + return final_output,entity_all + + + + + + + + + + + + + + + + + + + + diff --git a/src_python/SpeAss/model_sa.py b/src_python/SpeAss/model_sa.py index b1b857541f7ff138f1685a3d2354bab4e61784b2..23dd69430b36e5fdb0c02f463ccfbbe40cb3e3aa 100644 --- a/src_python/SpeAss/model_sa.py +++ b/src_python/SpeAss/model_sa.py @@ -1,105 +1,105 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Feb 10 09:08:09 2021 - -@author: luol2 - -Model Architecture - -""" -import tensorflow as tf -from src_python.SpeAss.represent_sa import Hugface_RepresentationLayer -from tensorflow.keras.layers import * -from tensorflow.keras.models import Model -from tensorflow.keras.optimizers import RMSprop, SGD, Adam, Adadelta, Adagrad,Nadam -from transformers import TFAutoModel -import numpy as np -import sys - - -class LRSchedule_LINEAR(tf.keras.optimizers.schedules.LearningRateSchedule): - def __init__( - self, - init_lr=5e-5, - init_warmup_lr=0.0, - final_lr=5e-7, - warmup_steps=0, - decay_steps=0, - ): - super().__init__() - self.init_lr = init_lr - self.init_warmup_lr=init_warmup_lr - self.final_lr = final_lr - self.warmup_steps = warmup_steps - self.decay_steps = decay_steps - - def __call__(self, step): - """ linear warm up - linear decay """ - if self.warmup_steps>0: - warmup_lr = (self.init_lr - self.init_warmup_lr)/self.warmup_steps * step+self.init_warmup_lr - else: - warmup_lr=1000.0 - #print('\n.......warmup_lr:',warmup_lr) - decay_lr = tf.math.maximum( - self.final_lr, - self.init_lr - (step - self.warmup_steps)/self.decay_steps*(self.init_lr - self.final_lr) - ) - #print('\n.....decay_lr:',decay_lr) - return tf.math.minimum(warmup_lr,decay_lr) - - - -class HUGFACE_NER(): #huggingface transformers - def __init__(self, model_files): - self.model_type='HUGFACE' - self.maxlen = 512 - self.checkpoint_path = model_files['checkpoint_path'] - self.label_file=model_files['labelfile'] - self.lowercase=model_files['lowercase'] - self.rep = Hugface_RepresentationLayer(self.checkpoint_path, self.label_file, lowercase=self.lowercase) - - - def build_encoder(self): - print('...vocab len:',self.rep.vocab_len) - plm_model = TFAutoModel.from_pretrained(self.checkpoint_path, from_pt=True) - plm_model.resize_token_embeddings(self.rep.vocab_len) - x1_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='input_ids') - x2_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='token_type_ids') - x3_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='attention_mask') - x = plm_model(x1_in, token_type_ids=x2_in, attention_mask=x3_in)[0] - #dense = TimeDistributed(Dense(512, activation='relu'), name='dense1')(x) - self.encoder = Model (inputs=[x1_in,x2_in,x3_in], outputs=x,name='hugface_encoder') - self.encoder.summary() - - def build_softmax_decoder(self): - - x1_in = Input(shape=(self.maxlen,),dtype=tf.int32) - x2_in = Input(shape=(self.maxlen,),dtype=tf.int32) - x3_in = Input(shape=(self.maxlen,),dtype=tf.int32) - features = self.encoder([x1_in,x2_in,x3_in]) - #features = Dropout(0.4)(features) - #features = TimeDistributed(Dense(128, activation='relu'), name='dense2')(features) - features= Dropout(0.1)(features) - output = TimeDistributed(Dense(self.rep.label_table_size, activation='softmax'), name='softmax')(features) - self.model = Model(inputs=[x1_in,x2_in,x3_in], outputs=output, name="hugface_softmax") - - # lr_schedule=LRSchedule_LINEAR( - # init_lr=1e-5, - # init_warmup_lr=1e-7, - # final_lr=1e-6, - # warmup_steps=0, - # decay_steps=40000) - - opt = Adam(learning_rate = 5e-6) - self.model.compile( - optimizer=opt, - loss='sparse_categorical_crossentropy', - metrics=['accuracy'], - ) - self.model.summary() - - - def load_model(self,model_file): - self.model.load_weights(model_file) - self.model.summary() - print('load HUGFACE model done!') +# -*- coding: utf-8 -*- +""" +Created on Wed Feb 10 09:08:09 2021 + +@author: luol2 + +Model Architecture + +""" +import tensorflow as tf +from src_python.SpeAss.represent_sa import Hugface_RepresentationLayer +from tensorflow.keras.layers import * +from tensorflow.keras.models import Model +from tensorflow.keras.optimizers import RMSprop, SGD, Adam, Adadelta, Adagrad,Nadam +from transformers import TFAutoModel +import numpy as np +import sys + + +class LRSchedule_LINEAR(tf.keras.optimizers.schedules.LearningRateSchedule): + def __init__( + self, + init_lr=5e-5, + init_warmup_lr=0.0, + final_lr=5e-7, + warmup_steps=0, + decay_steps=0, + ): + super().__init__() + self.init_lr = init_lr + self.init_warmup_lr=init_warmup_lr + self.final_lr = final_lr + self.warmup_steps = warmup_steps + self.decay_steps = decay_steps + + def __call__(self, step): + """ linear warm up - linear decay """ + if self.warmup_steps>0: + warmup_lr = (self.init_lr - self.init_warmup_lr)/self.warmup_steps * step+self.init_warmup_lr + else: + warmup_lr=1000.0 + #print('\n.......warmup_lr:',warmup_lr) + decay_lr = tf.math.maximum( + self.final_lr, + self.init_lr - (step - self.warmup_steps)/self.decay_steps*(self.init_lr - self.final_lr) + ) + #print('\n.....decay_lr:',decay_lr) + return tf.math.minimum(warmup_lr,decay_lr) + + + +class HUGFACE_NER(): #huggingface transformers + def __init__(self, model_files): + self.model_type='HUGFACE' + self.maxlen = 512 + self.checkpoint_path = model_files['checkpoint_path'] + self.label_file=model_files['labelfile'] + self.lowercase=model_files['lowercase'] + self.rep = Hugface_RepresentationLayer(self.checkpoint_path, self.label_file, lowercase=self.lowercase) + + + def build_encoder(self): + print('...vocab len:',self.rep.vocab_len) + plm_model = TFAutoModel.from_pretrained(self.checkpoint_path, from_pt=True) + plm_model.resize_token_embeddings(self.rep.vocab_len) + x1_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='input_ids') + x2_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='token_type_ids') + x3_in = Input(shape=(self.maxlen,),dtype=tf.int32, name='attention_mask') + x = plm_model(x1_in, token_type_ids=x2_in, attention_mask=x3_in)[0] + #dense = TimeDistributed(Dense(512, activation='relu'), name='dense1')(x) + self.encoder = Model (inputs=[x1_in,x2_in,x3_in], outputs=x,name='hugface_encoder') + self.encoder.summary() + + def build_softmax_decoder(self): + + x1_in = Input(shape=(self.maxlen,),dtype=tf.int32) + x2_in = Input(shape=(self.maxlen,),dtype=tf.int32) + x3_in = Input(shape=(self.maxlen,),dtype=tf.int32) + features = self.encoder([x1_in,x2_in,x3_in]) + #features = Dropout(0.4)(features) + #features = TimeDistributed(Dense(128, activation='relu'), name='dense2')(features) + features= Dropout(0.1)(features) + output = TimeDistributed(Dense(self.rep.label_table_size, activation='softmax'), name='softmax')(features) + self.model = Model(inputs=[x1_in,x2_in,x3_in], outputs=output, name="hugface_softmax") + + # lr_schedule=LRSchedule_LINEAR( + # init_lr=1e-5, + # init_warmup_lr=1e-7, + # final_lr=1e-6, + # warmup_steps=0, + # decay_steps=40000) + + opt = Adam(learning_rate = 5e-6) + self.model.compile( + optimizer=opt, + loss='sparse_categorical_crossentropy', + metrics=['accuracy'], + ) + self.model.summary() + + + def load_model(self,model_file): + self.model.load_weights(model_file) + self.model.summary() + print('load HUGFACE model done!') diff --git a/src_python/SpeAss/processing_data_sa.py b/src_python/SpeAss/processing_data_sa.py index a80ae827b9486053029f0dcafe3c32993ce4542e..31b2ccec296e96d83ebe09dbc20d6bd874bc7187 100644 --- a/src_python/SpeAss/processing_data_sa.py +++ b/src_python/SpeAss/processing_data_sa.py @@ -1,201 +1,201 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Mar 10 16:34:12 2020 - -@author: luol2 -""" -import numpy as np -import io -import sys -#read ner text (word\tlabel), generate the list[[[w1,label],[w2,label]]] -def ml_intext(file): - fin=open(file,'r',encoding='utf-8') - alltexts=fin.read().strip().split('\n\n') - fin.close() - data_list=[] - label_list=[] - - for sents in alltexts: - lines=sents.split('\n') - temp_sentece=[] - for i in range(0,len(lines)): - seg=lines[i].split('\t') - temp_sentece.append(seg[:]) - label_list.append(seg[-1]) - - data_list.append(temp_sentece) - #print(data_list) - #print(label_list) - return data_list,label_list - -def ml_intext_fn(alltexts): - # fin=io.StringIO(ml_input) - # alltexts=fin.read().strip().split('\n\n') - # fin.close() - data_list=[] - label_list=[] - - for sents in alltexts: - lines=sents.split('\n') - temp_sentece=[] - for i in range(0,len(lines)): - seg=lines[i].split('\t') - temp_sentece.append(seg[:]) - label_list.append(seg[-1]) - - data_list.append(temp_sentece) - #print(data_list) - #print(label_list) - return data_list,label_list - -# model predict result to conll evalute format [token answer predict] -def out_BIO(file,raw_pre,raw_input,label_set): - fout=open(file,'w',encoding='utf-8') - for i in range(len(raw_input)): - - for j in range(len(raw_input[i])): - if jmax_len: - max_len=word_len - print(seg[0]) - for i in range(word_len): - if seg[0][i] not in char_vocab: - char_vocab.append(seg[0][i]) - #else: - # fout.write(line) - fin.close() - #fout.close() - for ele in char_vocab: - fout_char.write(ele+'\n') - fout_char.close() - print('max_len:',max_len) - - -if __name__=='__main__': - # infile='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/data/pubmed_unlabel/mutation_disease_1990.ner_BIO' - # #outfile='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/data/pubmed_unlabel/mutation_disease_1990.ner_BIO_new' - # outfile_char='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/src/nn_model/vocab/char_vocab' - # #processing_text(file) - # char_vocab(infile,outfile_char) - a=[1,2,3] - print(a[:-1]) +# -*- coding: utf-8 -*- +""" +Created on Tue Mar 10 16:34:12 2020 + +@author: luol2 +""" +import numpy as np +import io +import sys +#read ner text (word\tlabel), generate the list[[[w1,label],[w2,label]]] +def ml_intext(file): + fin=open(file,'r',encoding='utf-8') + alltexts=fin.read().strip().split('\n\n') + fin.close() + data_list=[] + label_list=[] + + for sents in alltexts: + lines=sents.split('\n') + temp_sentece=[] + for i in range(0,len(lines)): + seg=lines[i].split('\t') + temp_sentece.append(seg[:]) + label_list.append(seg[-1]) + + data_list.append(temp_sentece) + #print(data_list) + #print(label_list) + return data_list,label_list + +def ml_intext_fn(alltexts): + # fin=io.StringIO(ml_input) + # alltexts=fin.read().strip().split('\n\n') + # fin.close() + data_list=[] + label_list=[] + + for sents in alltexts: + lines=sents.split('\n') + temp_sentece=[] + for i in range(0,len(lines)): + seg=lines[i].split('\t') + temp_sentece.append(seg[:]) + label_list.append(seg[-1]) + + data_list.append(temp_sentece) + #print(data_list) + #print(label_list) + return data_list,label_list + +# model predict result to conll evalute format [token answer predict] +def out_BIO(file,raw_pre,raw_input,label_set): + fout=open(file,'w',encoding='utf-8') + for i in range(len(raw_input)): + + for j in range(len(raw_input[i])): + if jmax_len: + max_len=word_len + print(seg[0]) + for i in range(word_len): + if seg[0][i] not in char_vocab: + char_vocab.append(seg[0][i]) + #else: + # fout.write(line) + fin.close() + #fout.close() + for ele in char_vocab: + fout_char.write(ele+'\n') + fout_char.close() + print('max_len:',max_len) + + +if __name__=='__main__': + # infile='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/data/pubmed_unlabel/mutation_disease_1990.ner_BIO' + # #outfile='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/data/pubmed_unlabel/mutation_disease_1990.ner_BIO_new' + # outfile_char='//panfs/pan1/bionlp/lulab/luoling/HPO_project/AutoPhe/src/nn_model/vocab/char_vocab' + # #processing_text(file) + # char_vocab(infile,outfile_char) + a=[1,2,3] + print(a[:-1]) diff --git a/src_python/SpeAss/represent_sa.py b/src_python/SpeAss/represent_sa.py index 608367babee7437c4f9111012c6a694402db4df5..fb7423acef89a8c7a013c8ec074c1a7dd7fe9b90 100644 --- a/src_python/SpeAss/represent_sa.py +++ b/src_python/SpeAss/represent_sa.py @@ -1,143 +1,143 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Aug 30 19:54:17 2021 - -@author: luol2 - -input representation of model - -""" - -import os, sys -import numpy as np -from tensorflow.keras.preprocessing.sequence import pad_sequences -from transformers import AutoTokenizer - - - -class Hugface_RepresentationLayer(object): - - - def __init__(self, tokenizer_name_or_path, label_file,lowercase=True): - - - #load vocab - - self.model_type='bert' - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_fast=True,do_lower_case=lowercase) - - self.tokenizer.add_tokens(["arg1s","arg1e","gene1s","gene1e","species1s","species1e"]) - - #load label - self.label_2_index={} - self.index_2_label={} - self.label_table_size=0 - self.load_label_vocab(label_file,self.label_2_index,self.index_2_label) - self.label_table_size=len(self.label_2_index) - self.vocab_len=len(self.tokenizer) - - def load_label_vocab(self,fea_file,fea_index,index_2_label): - - fin=open(fea_file,'r',encoding='utf-8') - all_text=fin.read().strip().split('\n') - fin.close() - for i in range(0,len(all_text)): - fea_index[all_text[i]]=i - index_2_label[str(i)]=all_text[i] - - - - def generate_label_list(self,bert_tokens,labels,word_index): - label_list=['O']*len(word_index) - label_i=0 - if len(word_index)!=len(bert_tokens): - print('index != tokens',word_index,bert_tokens) - sys.exit() - last_word_index=0 - for i in range(0,len(word_index)): - if word_index[i]==None: - pass - else: - label_list[i]=labels[word_index[i]] - - label_list_index=[] - bert_text_label=[] - for i in range(0,len(bert_tokens)): - label_list_index.append(self.label_2_index[label_list[i]]) - bert_text_label.append([bert_tokens[i],label_list[i]]) - - return label_list_index,bert_text_label - - - def load_data_hugface(self,instances, labels, word_max_len=100, label_type='softmax'): - - x_index=[] - x_seg=[] - x_mask=[] - y_list=[] - bert_text_labels=[] - max_len=0 - over_num=0 - maxT=word_max_len - ave_len=0 - - - for sentence in instances: - sentence_text_list=[] - label_list=[] - for j in range(0,len(sentence)): - sentence_text_list.append(sentence[j][0]) - label_list.append(sentence[j][-1]) - - token_result=self.tokenizer( - sentence_text_list, - max_length=word_max_len, - truncation=True,is_split_into_words=True) - - bert_tokens=self.tokenizer.convert_ids_to_tokens(token_result['input_ids']) - word_index=token_result.word_ids(batch_index=0) - ave_len+=len(bert_tokens) - if len(sentence_text_list)>max_len: - max_len=len(sentence_text_list) - if len(bert_tokens)>=maxT: - over_num+=1 - - x_index.append(token_result['input_ids']) - if self.model_type in {"gpt2", "roberta"}: - x_seg.append([0]*len(token_result['input_ids'])) - else: - x_seg.append(token_result['token_type_ids']) - x_mask.append(token_result['attention_mask']) - - #print('label:',label_list) - label_list,bert_text_label=self.generate_label_list(bert_tokens,label_list,word_index) - #print('\nlabel list:',label_list) - #print('\nbert_text_label:',bert_text_label) - #sys.exit() - y_list.append(label_list) - #print(y_list) - bert_text_labels.append(bert_text_label) - - - x1_np = pad_sequences(x_index, word_max_len, value=0, padding='post',truncating='post') # right padding - x2_np = pad_sequences(x_seg, word_max_len, value=0, padding='post',truncating='post') - x3_np = pad_sequences(x_mask, word_max_len, value=0, padding='post',truncating='post') - y_np = pad_sequences(y_list, word_max_len, value=0, padding='post',truncating='post') - - # print('bert max len:',max_len,',Over',maxT,':',over_num,'ave len:',ave_len/len(instances),'total:',len(instances)) - if label_type=='onehot': - y_np = np.eye(len(labels), dtype='float32')[y_np] - elif label_type=='softmax': - y_np = np.expand_dims(y_np, 2) - elif label_type=='crf': - pass - - - return [x1_np, x2_np,x3_np], y_np,bert_text_labels - - -if __name__ == '__main__': - pass - - - +# -*- coding: utf-8 -*- +""" +Created on Mon Aug 30 19:54:17 2021 + +@author: luol2 + +input representation of model + +""" + +import os, sys +import numpy as np +from tensorflow.keras.preprocessing.sequence import pad_sequences +from transformers import AutoTokenizer + + + +class Hugface_RepresentationLayer(object): + + + def __init__(self, tokenizer_name_or_path, label_file,lowercase=True): + + + #load vocab + + self.model_type='bert' + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_fast=True,do_lower_case=lowercase) + + self.tokenizer.add_tokens(["arg1s","arg1e","gene1s","gene1e","species1s","species1e"]) + + #load label + self.label_2_index={} + self.index_2_label={} + self.label_table_size=0 + self.load_label_vocab(label_file,self.label_2_index,self.index_2_label) + self.label_table_size=len(self.label_2_index) + self.vocab_len=len(self.tokenizer) + + def load_label_vocab(self,fea_file,fea_index,index_2_label): + + fin=open(fea_file,'r',encoding='utf-8') + all_text=fin.read().strip().split('\n') + fin.close() + for i in range(0,len(all_text)): + fea_index[all_text[i]]=i + index_2_label[str(i)]=all_text[i] + + + + def generate_label_list(self,bert_tokens,labels,word_index): + label_list=['O']*len(word_index) + label_i=0 + if len(word_index)!=len(bert_tokens): + print('index != tokens',word_index,bert_tokens) + sys.exit() + last_word_index=0 + for i in range(0,len(word_index)): + if word_index[i]==None: + pass + else: + label_list[i]=labels[word_index[i]] + + label_list_index=[] + bert_text_label=[] + for i in range(0,len(bert_tokens)): + label_list_index.append(self.label_2_index[label_list[i]]) + bert_text_label.append([bert_tokens[i],label_list[i]]) + + return label_list_index,bert_text_label + + + def load_data_hugface(self,instances, labels, word_max_len=100, label_type='softmax'): + + x_index=[] + x_seg=[] + x_mask=[] + y_list=[] + bert_text_labels=[] + max_len=0 + over_num=0 + maxT=word_max_len + ave_len=0 + + + for sentence in instances: + sentence_text_list=[] + label_list=[] + for j in range(0,len(sentence)): + sentence_text_list.append(sentence[j][0]) + label_list.append(sentence[j][-1]) + + token_result=self.tokenizer( + sentence_text_list, + max_length=word_max_len, + truncation=True,is_split_into_words=True) + + bert_tokens=self.tokenizer.convert_ids_to_tokens(token_result['input_ids']) + word_index=token_result.word_ids(batch_index=0) + ave_len+=len(bert_tokens) + if len(sentence_text_list)>max_len: + max_len=len(sentence_text_list) + if len(bert_tokens)>=maxT: + over_num+=1 + + x_index.append(token_result['input_ids']) + if self.model_type in {"gpt2", "roberta"}: + x_seg.append([0]*len(token_result['input_ids'])) + else: + x_seg.append(token_result['token_type_ids']) + x_mask.append(token_result['attention_mask']) + + #print('label:',label_list) + label_list,bert_text_label=self.generate_label_list(bert_tokens,label_list,word_index) + #print('\nlabel list:',label_list) + #print('\nbert_text_label:',bert_text_label) + #sys.exit() + y_list.append(label_list) + #print(y_list) + bert_text_labels.append(bert_text_label) + + + x1_np = pad_sequences(x_index, word_max_len, value=0, padding='post',truncating='post') # right padding + x2_np = pad_sequences(x_seg, word_max_len, value=0, padding='post',truncating='post') + x3_np = pad_sequences(x_mask, word_max_len, value=0, padding='post',truncating='post') + y_np = pad_sequences(y_list, word_max_len, value=0, padding='post',truncating='post') + + # print('bert max len:',max_len,',Over',maxT,':',over_num,'ave len:',ave_len/len(instances),'total:',len(instances)) + if label_type=='onehot': + y_np = np.eye(len(labels), dtype='float32')[y_np] + elif label_type=='softmax': + y_np = np.expand_dims(y_np, 2) + elif label_type=='crf': + pass + + + return [x1_np, x2_np,x3_np], y_np,bert_text_labels + + +if __name__ == '__main__': + pass + + + diff --git a/src_python/SpeAss/sa_tag.py b/src_python/SpeAss/sa_tag.py index d680e2daad7bb8b7a8d3854578a85ce73b10e2f9..35be7fa904d7ebb23027eb2c50dc1d68da46bb55 100644 --- a/src_python/SpeAss/sa_tag.py +++ b/src_python/SpeAss/sa_tag.py @@ -1,586 +1,586 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Jun 8 11:52:42 2022 - -@author: luol2 -""" - -import io -import os -import argparse -import stanza -import sys -import re -import bioc -from src_python.SpeAss.ml_tagging_score_sa import NER_Tag - - -def ssplit_token(infile,nlp_token): - fin=io.StringIO(infile.getvalue()) - fout=io.StringIO() - # fout=open(outfile,'w',encoding='utf-8') - all_in=fin.read().strip().split('\n\n') - fin.close() - ori_text_newentity={} #{line[0]+line[1]:[all entity]} - entity_type=set() - token_text_new={}#{pmid:token_text} - for doc_text in all_in: - lines=doc_text.split('\n') - ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] - pmid=lines[0].split('|t|')[0] - # print(pmid) - entity_all=[] #[[seg0,seg1,...,],[]] - entity_all_ori=[] - entity_num=0 - - #first sort - doc_result={} - for i in range(2,len(lines)): - segs=lines[i].split('\t') - doc_result[lines[i]]=[int(segs[2]),int(segs[3])] - doc_result=sorted(doc_result.items(), key=lambda kv:(kv[1]), reverse=False) - doc_result_sort=[] - for ele in doc_result: - doc_result_sort.append(ele[0]) - - for i in range(0,len(doc_result_sort)): - seg=doc_result_sort[i].strip().split('\t') - entity_type.add(seg[5]) - # print(seg) - if len(seg)<=6:#Gene - entity_all_ori.append([seg[0],seg[1],'M'+str(entity_num),seg[2],seg[3],seg[4],seg[5],'-']) - entity_all.append([seg[0],seg[1],'M'+str(entity_num),seg[2],seg[3],seg[4],'Gene','-']) - entity_num+=1 - elif seg[-1].find('*')>=0:# *Species - entity_all_ori.append([seg[0],seg[1],'M'+str(entity_num),seg[2],seg[3],seg[4],seg[5],seg[6]]) - entity_all.append([seg[0],seg[1],'M'+str(entity_num),seg[2],seg[3],seg[4],'Species',seg[6]]) - entity_num+=1 - ori_text_newentity[lines[0]+'\n'+lines[1]]=entity_all_ori - # sys.exit() - - #ssplit token - doc_stanza = nlp_token(ori_text) - token_text='' - sentence_index=[] #[text_offset] - for sent in doc_stanza.sentences: - for word in sent.words: - if word.text.strip()=='': - # print('token is blank!') - pass - token_text+=word.text+' ' - token_text=token_text+'' #sentence split - sentence_index.append(len(token_text)) - - #ori_index map token_index - index_map=[-1]*len(ori_text) - j=0 - space_list=[' ',chr(160),chr(8201),chr(8194),chr(8197),chr(8202)] #空格有好几种,第一个是常用32,第二个shi 160,8201,8194,8197 - for i in range(0,len(ori_text)): - if ori_text[i] in space_list: - pass - elif ori_text[i]==token_text[j]: - index_map[i]=j - j+=1 - else: - j+=1 - temp_log=j - try: - while(ori_text[i]!=token_text[j]): - j+=1 - except: - print('doc',doc_text) - print('token_text:',token_text) - print('error:',ori_text[i-10:i+10],'i:',ori_text[i],'j:',token_text[temp_log],',',token_text[temp_log-10:temp_log+10]) - print(ord(ori_text[i]),ord(' ')) - sys.exit() - index_map[i]=j - j+=1 - # token_text=token_text.replace(' ','') - # print(token_text) - fout.write(token_text+'\n') - token_text_new[pmid]=token_text - entity_i=0 - cur_sent_i=0 - new_ente=0 - cur_sents=0 - cur_sente=sentence_index[0] - if entity_all!=[]: - bug_new_entity=[] - for entity_i in range(0,len(entity_all)): - new_ents=index_map[int(entity_all[entity_i][3])] - new_ente=index_map[int(entity_all[entity_i][4])-1]+1 - new_ent=token_text[new_ents:new_ente] - old_ent=entity_all[entity_i][5] - cur_sent_i=0 - cur_sents=0 - cur_sente=sentence_index[0] - while (not (max(new_ents,cur_sents) <= min(new_ente,cur_sente))) and (cur_sent_i=cur_sents and new_ente< cur_sente: - - if new_ent.replace(' ','') !=old_ent.replace(' ',''): - # print('entity error:',pmid,old_ent,new_ent,entity_all[entity_i][2],entity_all[entity_i][3]) - pass - fout.write(entity_all[entity_i][0]+'\t'+entity_all[entity_i][1]+'\t'+entity_all[entity_i][2]+'-'+str(cur_sent_i)+'\t'+str(new_ents)+'\t'+str(new_ente)+'\t'+new_ent+'\t'+entity_all[entity_i][6]+'\t'+entity_all[entity_i][7]+'\n') - entity_i+=1 - if entity_i>=len(entity_all): - break - new_ents=index_map[int(entity_all[entity_i][3])] - new_ente=index_map[int(entity_all[entity_i][4])-1]+1 - new_ent=token_text[new_ents:new_ente] - old_ent=entity_all[entity_i][5] - cur_sent_i+=1 - if cur_sent_i >= len(sentence_index): - break - cur_sents=sentence_index[cur_sent_i-1] - cur_sente=sentence_index[cur_sent_i] - """ - fout.write('\n') - # print(entity_type) - # fout.close() - return ori_text_newentity,token_text_new,fout - -def filter_nest(infile): #nonest - - # fin=open(infile,'r',encoding='utf-8') - # fout=open(outfile,'w',encoding='utf-8') - fin=io.StringIO(infile.getvalue()) - fout=io.StringIO() - - documents=fin.read().strip().split('\n\n') - fin.close() - total_entity=0 - over_entity=0 - nest_entity=0 - for doc in documents: - lines=doc.split('\n') - context=lines[0] - entity_list=[] - if len(lines)>1: - first_entity=lines[1].split('\t') - nest_list=[first_entity] - max_eid=int(first_entity[4]) - total_entity+=len(lines)-2 - for i in range(2,len(lines)): - segs=lines[i].split('\t') - if int(segs[3])> max_eid: - if len(nest_list)==1: - entity_list.append(nest_list[0]) - nest_list=[] - nest_list.append(segs) - if int(segs[4])>max_eid: - max_eid=int(segs[4]) - else: - # print(nest_list) - nest_entity+=len(nest_list)-1 - tem=find_max_entity(nest_list)#find max entity - # if len(tem)>1: - # print('max nest >1:',tem) - entity_list.extend(tem) - nest_list=[] - nest_list.append(segs) - if int(segs[4])>max_eid: - max_eid=int(segs[4]) - - else: - nest_list.append(segs) - if int(segs[4])>max_eid: - max_eid=int(segs[4]) - if nest_list!=[]: - if len(nest_list)==1: - entity_list.append(nest_list[0]) - - else: - tem=find_max_entity(nest_list)#find max entity - # if len(tem)>1: - # print('max nest >1:',tem) - entity_list.extend(tem) - fout.write(context+'\n') - for ele in entity_list: - fout.write('\t'.join(ele)+'\n') - fout.write('\n') - # print(total_entity,over_entity, nest_entity) - return fout -def find_max_entity(nest_list): - max_len=0 - final_tem=[] - max_index=0 - for i in range(0, len(nest_list)): - cur_len=int(nest_list[i][4])-int(nest_list[i][3]) - if cur_len>max_len: - max_len=cur_len - max_index=i - elif cur_len==max_len: - if nest_list[i][6] =='Gene': - max_index=i - # elif nest_list[i][5] =='Species': - # final_tem.append(nest_list[i]) - - final_tem.append(nest_list[max_index]) - return final_tem - - -# machine learning species assignment -def ml_tag(infile,nn_model): - - #tagging text - fin=io.StringIO(infile.getvalue()) - fout=io.StringIO() - # fin=open(infile,'r',encoding='utf-8') - all_in=fin.read().strip().split('\n\n') - fin.close() - - for doc in all_in: - pre_result,entity_all=NER_Tag(doc, nn_model) - for ele in entity_all: - ent_id=ele[3]+'-'+ele[4] - if ent_id in pre_result.keys(): - fout.write('\t'.join(ele)+'\t'+','.join(pre_result[ent_id])+'\n') - else: - fout.write('\t'.join(ele)+'\t-\n') - fout.write('\n') - - return fout - - -# details nearest+and -def post_rule1(ori_context,token_text,infile,outfile): - fin=open(infile,'r',encoding='utf-8') - fout=open(outfile,'w',encoding='utf-8') - pred_results={} #{pmid:{'M0':{'sent':'','offset':[sid,eid],'score':[[id,score],[id,score]]}}} #gene - species_index={} #{pmid:{sentid:[[spe_seg1],[spe_seg]]}} - mem_sent={} #{pmid:{'M0':sentid}} - gene_num=0 - gene_none=0 - for line in fin: - seg=line.strip().split('\t') - if len(seg)>1: - if seg[0] not in mem_sent.keys(): - _term_seg=seg[1].split('-') - mem_sent[seg[0]]={_term_seg[0]:_term_seg[1]} - else: - _term_seg=seg[1].split('-') - mem_sent[seg[0]][_term_seg[0]]=_term_seg[1] - if seg[5]=='Species': - if seg[0] not in species_index.keys(): - _sent_id=seg[1].split('-')[1] - species_index[seg[0]]={_sent_id:[seg]} - else: - _sent_id=seg[1].split('-')[1] - if _sent_id in species_index[seg[0]].keys(): - species_index[seg[0]][_sent_id].append(seg) - else: - species_index[seg[0]][_sent_id]=[seg] - else: - _pred_ids=seg[-1].split(',') - _temp_id_score=[] #[[spe_id,score]] - _sent_id=seg[1].split('-')[1] - for _pred_id in _pred_ids: - _temp_id_score.append(_pred_id.split('|')) - if seg[0] not in pred_results.keys(): - pred_results[seg[0]]={seg[1].split('-')[0]:{'sent':_sent_id,'offset':[seg[2],seg[3]],'score':_temp_id_score}} - else: - pred_results[seg[0]][seg[1].split('-')[0]]={'sent':_sent_id,'offset':[seg[2],seg[3]],'score':_temp_id_score} - #print(pred_results) - for pmid_text in ori_context.keys(): - #print(pmid_text) - lines=pmid_text.split('\n') - ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] - # print(ori_text) - fout.write(pmid_text+'\n') - pmid=lines[0].split('|t|')[0] - before_species=[] #nearest [eid,spe_id] - after_species=[] #nearest [sid,spe_id] - doc_specs=species_index[pmid] - #mul and spe - mul_and_spe=[] - for spe_sent in doc_specs.keys(): - last_id='' - new_diff_spe=[] - _temp_speid=set() - for ele in doc_specs[spe_sent]: - if ele[-2] !=last_id: - new_diff_spe.append(ele) - last_id =ele[-2] - _temp_speid.add(ele[-2]) - else: - - new_diff_spe.pop() - new_diff_spe.append(ele) - _temp_speid.add(ele[-2]) - last_id =ele[-2] - if len(new_diff_spe)==2: - spe_and_text=new_diff_spe[0][4]+' and '+new_diff_spe[1][4] - if ori_text.find(spe_and_text)>=0: - # print('old:',doc_specs[spe_sent]) - # print('new:',new_diff_spe) - # print('\n') - mul_and_spe=list(_temp_speid) - # print(mul_and_spe) - elif len(new_diff_spe)>2: - spe_and_text='' - for i in range(0,len(new_diff_spe)-1): - spe_and_text+=new_diff_spe[i][4]+', ' - spe_and_text1=spe_and_text[0:-2]+' and '+new_diff_spe[-1][4] - spe_and_text2=spe_and_text+'and '+new_diff_spe[-1][4] - if ori_text.find(spe_and_text1)>=0 or ori_text.find(spe_and_text2)>=0: - # print('old:',doc_specs[spe_sent]) - # print('new:',new_diff_spe) - mul_and_spe=list(_temp_speid) - # print(mul_and_spe) - Gene_type_list=['Gene','FamilyName','DomainMotif'] - for i,ele in enumerate(ori_context[pmid_text]): - #print(ele) - - if ele[5] in Gene_type_list: - gene_num+=1 - final_preds=set() - if ele[1] in pred_results[ele[0]].keys(): - temp_preds=pred_results[ele[0]][ele[1]]['score'] - - if temp_preds!=[['-']]: - if len(temp_preds)==1: - final_preds.add(temp_preds[0][0]) - else: - max_id='' - max_score=0 - for _temp_pred in temp_preds: - _score=float(_temp_pred[1]) - _id_ass=_temp_pred[0] - if len(mul_and_spe)>1: - if _score>0.5 and (_id_ass in mul_and_spe): - # print(_score) - final_preds.add(_id_ass) - if _score>max_score: - max_id=_id_ass - max_score=_score - if len(final_preds)==0: - final_preds.add(max_id) - # final_preds.add(multi_id) - else: #'-' nearst rule - gene_none+=1 - # print(mem_sent[ele[0]]) - _sent_id_gene=mem_sent[ele[0]][ele[1]] - - for j in range(i+1,len(ori_context[pmid_text])): - temp_seg=ori_context[pmid_text][j] - if temp_seg[5]=='Species': - after_species=[int(temp_seg[2]),temp_seg[6]] - break - # print(before_species,after_species) - # print(seg) - if before_species!=[] and after_species!=[]: - if len(ori_text[before_species[0]:int(ele[2])].split()) > len(ori_text[int(ele[3]):after_species[0]].split()): - final_preds.add(after_species[1]) - else: - final_preds.add(before_species[1]) - elif before_species==[]: - final_preds.add(after_species[1]) - elif after_species==[]: - final_preds.add(before_species[1]) - if len(final_preds)==0: - print('none pred!!!') - fout.write(ele[0]+'\t'+'\t'.join(ele[2:])+'\t'+','.join(final_preds)+'\n') - else: - # gene_none+=1 - # print(ele) - for j in range(i+1,len(ori_context[pmid_text])): - temp_seg=ori_context[pmid_text][j] - if temp_seg[5]=='Species': - after_species=[int(temp_seg[2]),temp_seg[6]] - break - # print(before_species,after_species) - # print(seg) - if before_species!=[] and after_species!=[]: - if len(ori_text[before_species[0]:int(ele[2])].split()) > len(ori_text[int(ele[3]):after_species[0]].split()): - final_preds.add(after_species[1]) - else: - final_preds.add(before_species[1]) - elif before_species==[]: - final_preds.add(after_species[1]) - elif after_species==[]: - final_preds.add(before_species[1]) - fout.write(ele[0]+'\t'+'\t'.join(ele[2:])+'\t'+','.join(final_preds)+'\n') - else: - fout.write(ele[0]+'\t'+'\t'.join(ele[2:])+'\t-\n') - before_species=[int(ele[3]),ele[6]] - fout.write('\n') - print('gene, none:',gene_num,gene_none) - fout.close() - -# major+and -def post_rule2(ori_context,token_text,infile): - # fin=open(infile,'r',encoding='utf-8') - # fout=open(outfile,'w',encoding='utf-8') - fin=io.StringIO(infile.getvalue()) - fout=io.StringIO() - pred_results={} #{pmid:{'M0':{'sent':'','offset':[sid,eid],'score':[[id,score],[id,score]]}}} #gene - species_index={} #{pmid:{sentid:[[spe_seg1],[spe_seg]]}} - species_count={}#{pmid:{speid:num}} - gene_num=0 - gene_none=0 - for line in fin: - seg=line.strip().split('\t') - if len(seg)>1: - if seg[6]=='Species': - if seg[0] not in species_count.keys(): - species_count[seg[0]]={seg[-2]:1} - else: - if seg[-2] not in species_count[seg[0]].keys(): - species_count[seg[0]][seg[-2]]=1 - else: - species_count[seg[0]][seg[-2]]+=1 - - if seg[0] not in species_index.keys(): - _sent_id=seg[2].split('-')[1] - species_index[seg[0]]={_sent_id:[seg]} - else: - _sent_id=seg[2].split('-')[1] - if _sent_id in species_index[seg[0]].keys(): - species_index[seg[0]][_sent_id].append(seg) - else: - species_index[seg[0]][_sent_id]=[seg] - else: - _pred_ids=seg[-1].split(',') - _temp_id_score=[] #[[spe_id,score]] - _sent_id=seg[2].split('-')[1] - for _pred_id in _pred_ids: - _temp_id_score.append(_pred_id.split('|')) - if seg[0] not in pred_results.keys(): - pred_results[seg[0]]={seg[2].split('-')[0]:{'sent':_sent_id,'offset':[seg[3],seg[4]],'score':_temp_id_score}} - else: - pred_results[seg[0]][seg[2].split('-')[0]]={'sent':_sent_id,'offset':[seg[3],seg[4]],'score':_temp_id_score} - fin.close() - #print(pred_results) - for pmid_text in ori_context.keys(): - #print(pmid_text) - lines=pmid_text.split('\n') - ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] - # print(ori_text) - fout.write(pmid_text+'\n') - pmid=lines[0].split('|t|')[0] - if pmid in species_count.keys(): - marjor_species = max(zip(species_count[pmid].values(), species_count[pmid].keys())) - else: - marjor_species = (1000,'*9606') - - if pmid in species_index.keys(): - doc_specs=species_index[pmid] - #mul and spe - mul_and_spe=[] - for spe_sent in doc_specs.keys(): - last_id='' - new_diff_spe=[] - _temp_speid=set() - for ele in doc_specs[spe_sent]: - if ele[-2] !=last_id: - new_diff_spe.append(ele) - last_id =ele[-2] - _temp_speid.add(ele[-2]) - else: - - new_diff_spe.pop() - new_diff_spe.append(ele) - _temp_speid.add(ele[-2]) - last_id =ele[-2] - if len(new_diff_spe)==2: - spe_and_text=new_diff_spe[0][5]+' and '+new_diff_spe[1][5] - if ori_text.find(spe_and_text)>=0: - # print('old:',doc_specs[spe_sent]) - # print('new:',new_diff_spe) - # print('\n') - mul_and_spe=list(_temp_speid) - # print(mul_and_spe) - elif len(new_diff_spe)>2: - spe_and_text='' - for i in range(0,len(new_diff_spe)-1): - spe_and_text+=new_diff_spe[i][5]+', ' - spe_and_text1=spe_and_text[0:-2]+' and '+new_diff_spe[-1][5] - spe_and_text2=spe_and_text+'and '+new_diff_spe[-1][5] - if ori_text.find(spe_and_text1)>=0 or ori_text.find(spe_and_text2)>=0: - # print('old:',doc_specs[spe_sent]) - # print('new:',new_diff_spe) - mul_and_spe=list(_temp_speid) - # print(mul_and_spe) - else: - mul_and_spe=[] - - Gene_type_list=['Gene','FamilyName','DomainMotif'] - for i,ele in enumerate(ori_context[pmid_text]): - #print(ele) - - if ele[6] in Gene_type_list: - gene_num+=1 - final_preds=set() - if (ele[0] in pred_results.keys()) and (ele[2] in pred_results[ele[0]].keys()): - temp_preds=pred_results[ele[0]][ele[2]]['score'] - - if temp_preds!=[['-']]: - if len(temp_preds)==1: - final_preds.add(temp_preds[0][0]) - else: - max_id='' - max_score=0 - for _temp_pred in temp_preds: - _score=float(_temp_pred[1]) - _id_ass=_temp_pred[0] - if len(mul_and_spe)>1: - if _score>0.5 and (_id_ass in mul_and_spe): - # print(_score) - final_preds.add(_id_ass) - if _score>max_score: - max_id=_id_ass - max_score=_score - if len(final_preds)==0: - final_preds.add(max_id) - # final_preds.add(multi_id) - else: #'-' major species - gene_none+=1 - # print(mem_sent[ele[0]]) - final_preds.add(marjor_species[1]) - if len(final_preds)==0: - print('none pred!!!') - fout.write(ele[0]+'\t'+ele[1]+'\t'+'\t'.join(ele[3:-1])+'\t'+','.join(final_preds).replace('*','')+'\n') - else: - final_preds.add(marjor_species[1]) - fout.write(ele[0]+'\t'+ele[1]+'\t'+'\t'.join(ele[3:-1])+'\t'+','.join(final_preds).replace('*','')+'\n') - else: - fout.write(ele[0]+'\t'+ele[1]+'\t'+'\t'.join(ele[3:])+'\n') - fout.write('\n') - # print('gene, none:',gene_num,gene_none) - return fout - -def ml_tag_main(fin_pubtator,nlp_token, nn_model): - #print('.......senten split, tokenizer.........') - #print('...in...\n',fin_pubtator.getvalue()) - ori_text_newentity,token_text,token_out=ssplit_token(fin_pubtator,nlp_token) - #print('...token....\n',token_out.getvalue()) - - #2. filter nest entity - nonest_out=filter_nest(token_out) - #print(nonest_out.getvalue()) - - #3.ml tag - #print('.......machine learning-based tagging.........') - - ml_out=ml_tag(nonest_out, nn_model) - #print('.....ml.....\n',ml_out.getvalue()) - - #4. post processing - #print('.......post processing.........') - post_out=post_rule2(ori_text_newentity,token_text,ml_out) - #print('.........ori_text...............\n', ori_text_newentity) - #print('.....post.....\n',post_out.getvalue()) - return post_out +# -*- coding: utf-8 -*- +""" +Created on Wed Jun 8 11:52:42 2022 + +@author: luol2 +""" + +import io +import os +import argparse +import stanza +import sys +import re +import bioc +from src_python.SpeAss.ml_tagging_score_sa import NER_Tag + + +def ssplit_token(infile,nlp_token): + fin=io.StringIO(infile.getvalue()) + fout=io.StringIO() + # fout=open(outfile,'w',encoding='utf-8') + all_in=fin.read().strip().split('\n\n') + fin.close() + ori_text_newentity={} #{line[0]+line[1]:[all entity]} + entity_type=set() + token_text_new={}#{pmid:token_text} + for doc_text in all_in: + lines=doc_text.split('\n') + ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] + pmid=lines[0].split('|t|')[0] + # print(pmid) + entity_all=[] #[[seg0,seg1,...,],[]] + entity_all_ori=[] + entity_num=0 + + #first sort + doc_result={} + for i in range(2,len(lines)): + segs=lines[i].split('\t') + doc_result[lines[i]]=[int(segs[2]),int(segs[3])] + doc_result=sorted(doc_result.items(), key=lambda kv:(kv[1]), reverse=False) + doc_result_sort=[] + for ele in doc_result: + doc_result_sort.append(ele[0]) + + for i in range(0,len(doc_result_sort)): + seg=doc_result_sort[i].strip().split('\t') + entity_type.add(seg[5]) + # print(seg) + if len(seg)<=6:#Gene + entity_all_ori.append([seg[0],seg[1],'M'+str(entity_num),seg[2],seg[3],seg[4],seg[5],'-']) + entity_all.append([seg[0],seg[1],'M'+str(entity_num),seg[2],seg[3],seg[4],'Gene','-']) + entity_num+=1 + elif seg[-1].find('*')>=0:# *Species + entity_all_ori.append([seg[0],seg[1],'M'+str(entity_num),seg[2],seg[3],seg[4],seg[5],seg[6]]) + entity_all.append([seg[0],seg[1],'M'+str(entity_num),seg[2],seg[3],seg[4],'Species',seg[6]]) + entity_num+=1 + ori_text_newentity[lines[0]+'\n'+lines[1]]=entity_all_ori + # sys.exit() + + #ssplit token + doc_stanza = nlp_token(ori_text) + token_text='' + sentence_index=[] #[text_offset] + for sent in doc_stanza.sentences: + for word in sent.words: + if word.text.strip()=='': + # print('token is blank!') + pass + token_text+=word.text+' ' + token_text=token_text+'' #sentence split + sentence_index.append(len(token_text)) + + #ori_index map token_index + index_map=[-1]*len(ori_text) + j=0 + space_list=[' ',chr(160),chr(8201),chr(8194),chr(8197),chr(8202)] #空格有好几种,第一个是常用32,第二个shi 160,8201,8194,8197 + for i in range(0,len(ori_text)): + if ori_text[i] in space_list: + pass + elif ori_text[i]==token_text[j]: + index_map[i]=j + j+=1 + else: + j+=1 + temp_log=j + try: + while(ori_text[i]!=token_text[j]): + j+=1 + except: + print('doc',doc_text) + print('token_text:',token_text) + print('error:',ori_text[i-10:i+10],'i:',ori_text[i],'j:',token_text[temp_log],',',token_text[temp_log-10:temp_log+10]) + print(ord(ori_text[i]),ord(' ')) + sys.exit() + index_map[i]=j + j+=1 + # token_text=token_text.replace(' ','') + # print(token_text) + fout.write(token_text+'\n') + token_text_new[pmid]=token_text + entity_i=0 + cur_sent_i=0 + new_ente=0 + cur_sents=0 + cur_sente=sentence_index[0] + if entity_all!=[]: + bug_new_entity=[] + for entity_i in range(0,len(entity_all)): + new_ents=index_map[int(entity_all[entity_i][3])] + new_ente=index_map[int(entity_all[entity_i][4])-1]+1 + new_ent=token_text[new_ents:new_ente] + old_ent=entity_all[entity_i][5] + cur_sent_i=0 + cur_sents=0 + cur_sente=sentence_index[0] + while (not (max(new_ents,cur_sents) <= min(new_ente,cur_sente))) and (cur_sent_i=cur_sents and new_ente< cur_sente: + + if new_ent.replace(' ','') !=old_ent.replace(' ',''): + # print('entity error:',pmid,old_ent,new_ent,entity_all[entity_i][2],entity_all[entity_i][3]) + pass + fout.write(entity_all[entity_i][0]+'\t'+entity_all[entity_i][1]+'\t'+entity_all[entity_i][2]+'-'+str(cur_sent_i)+'\t'+str(new_ents)+'\t'+str(new_ente)+'\t'+new_ent+'\t'+entity_all[entity_i][6]+'\t'+entity_all[entity_i][7]+'\n') + entity_i+=1 + if entity_i>=len(entity_all): + break + new_ents=index_map[int(entity_all[entity_i][3])] + new_ente=index_map[int(entity_all[entity_i][4])-1]+1 + new_ent=token_text[new_ents:new_ente] + old_ent=entity_all[entity_i][5] + cur_sent_i+=1 + if cur_sent_i >= len(sentence_index): + break + cur_sents=sentence_index[cur_sent_i-1] + cur_sente=sentence_index[cur_sent_i] + """ + fout.write('\n') + # print(entity_type) + # fout.close() + return ori_text_newentity,token_text_new,fout + +def filter_nest(infile): #nonest + + # fin=open(infile,'r',encoding='utf-8') + # fout=open(outfile,'w',encoding='utf-8') + fin=io.StringIO(infile.getvalue()) + fout=io.StringIO() + + documents=fin.read().strip().split('\n\n') + fin.close() + total_entity=0 + over_entity=0 + nest_entity=0 + for doc in documents: + lines=doc.split('\n') + context=lines[0] + entity_list=[] + if len(lines)>1: + first_entity=lines[1].split('\t') + nest_list=[first_entity] + max_eid=int(first_entity[4]) + total_entity+=len(lines)-2 + for i in range(2,len(lines)): + segs=lines[i].split('\t') + if int(segs[3])> max_eid: + if len(nest_list)==1: + entity_list.append(nest_list[0]) + nest_list=[] + nest_list.append(segs) + if int(segs[4])>max_eid: + max_eid=int(segs[4]) + else: + # print(nest_list) + nest_entity+=len(nest_list)-1 + tem=find_max_entity(nest_list)#find max entity + # if len(tem)>1: + # print('max nest >1:',tem) + entity_list.extend(tem) + nest_list=[] + nest_list.append(segs) + if int(segs[4])>max_eid: + max_eid=int(segs[4]) + + else: + nest_list.append(segs) + if int(segs[4])>max_eid: + max_eid=int(segs[4]) + if nest_list!=[]: + if len(nest_list)==1: + entity_list.append(nest_list[0]) + + else: + tem=find_max_entity(nest_list)#find max entity + # if len(tem)>1: + # print('max nest >1:',tem) + entity_list.extend(tem) + fout.write(context+'\n') + for ele in entity_list: + fout.write('\t'.join(ele)+'\n') + fout.write('\n') + # print(total_entity,over_entity, nest_entity) + return fout +def find_max_entity(nest_list): + max_len=0 + final_tem=[] + max_index=0 + for i in range(0, len(nest_list)): + cur_len=int(nest_list[i][4])-int(nest_list[i][3]) + if cur_len>max_len: + max_len=cur_len + max_index=i + elif cur_len==max_len: + if nest_list[i][6] =='Gene': + max_index=i + # elif nest_list[i][5] =='Species': + # final_tem.append(nest_list[i]) + + final_tem.append(nest_list[max_index]) + return final_tem + + +# machine learning species assignment +def ml_tag(infile,nn_model): + + #tagging text + fin=io.StringIO(infile.getvalue()) + fout=io.StringIO() + # fin=open(infile,'r',encoding='utf-8') + all_in=fin.read().strip().split('\n\n') + fin.close() + + for doc in all_in: + pre_result,entity_all=NER_Tag(doc, nn_model) + for ele in entity_all: + ent_id=ele[3]+'-'+ele[4] + if ent_id in pre_result.keys(): + fout.write('\t'.join(ele)+'\t'+','.join(pre_result[ent_id])+'\n') + else: + fout.write('\t'.join(ele)+'\t-\n') + fout.write('\n') + + return fout + + +# details nearest+and +def post_rule1(ori_context,token_text,infile,outfile): + fin=open(infile,'r',encoding='utf-8') + fout=open(outfile,'w',encoding='utf-8') + pred_results={} #{pmid:{'M0':{'sent':'','offset':[sid,eid],'score':[[id,score],[id,score]]}}} #gene + species_index={} #{pmid:{sentid:[[spe_seg1],[spe_seg]]}} + mem_sent={} #{pmid:{'M0':sentid}} + gene_num=0 + gene_none=0 + for line in fin: + seg=line.strip().split('\t') + if len(seg)>1: + if seg[0] not in mem_sent.keys(): + _term_seg=seg[1].split('-') + mem_sent[seg[0]]={_term_seg[0]:_term_seg[1]} + else: + _term_seg=seg[1].split('-') + mem_sent[seg[0]][_term_seg[0]]=_term_seg[1] + if seg[5]=='Species': + if seg[0] not in species_index.keys(): + _sent_id=seg[1].split('-')[1] + species_index[seg[0]]={_sent_id:[seg]} + else: + _sent_id=seg[1].split('-')[1] + if _sent_id in species_index[seg[0]].keys(): + species_index[seg[0]][_sent_id].append(seg) + else: + species_index[seg[0]][_sent_id]=[seg] + else: + _pred_ids=seg[-1].split(',') + _temp_id_score=[] #[[spe_id,score]] + _sent_id=seg[1].split('-')[1] + for _pred_id in _pred_ids: + _temp_id_score.append(_pred_id.split('|')) + if seg[0] not in pred_results.keys(): + pred_results[seg[0]]={seg[1].split('-')[0]:{'sent':_sent_id,'offset':[seg[2],seg[3]],'score':_temp_id_score}} + else: + pred_results[seg[0]][seg[1].split('-')[0]]={'sent':_sent_id,'offset':[seg[2],seg[3]],'score':_temp_id_score} + #print(pred_results) + for pmid_text in ori_context.keys(): + #print(pmid_text) + lines=pmid_text.split('\n') + ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] + # print(ori_text) + fout.write(pmid_text+'\n') + pmid=lines[0].split('|t|')[0] + before_species=[] #nearest [eid,spe_id] + after_species=[] #nearest [sid,spe_id] + doc_specs=species_index[pmid] + #mul and spe + mul_and_spe=[] + for spe_sent in doc_specs.keys(): + last_id='' + new_diff_spe=[] + _temp_speid=set() + for ele in doc_specs[spe_sent]: + if ele[-2] !=last_id: + new_diff_spe.append(ele) + last_id =ele[-2] + _temp_speid.add(ele[-2]) + else: + + new_diff_spe.pop() + new_diff_spe.append(ele) + _temp_speid.add(ele[-2]) + last_id =ele[-2] + if len(new_diff_spe)==2: + spe_and_text=new_diff_spe[0][4]+' and '+new_diff_spe[1][4] + if ori_text.find(spe_and_text)>=0: + # print('old:',doc_specs[spe_sent]) + # print('new:',new_diff_spe) + # print('\n') + mul_and_spe=list(_temp_speid) + # print(mul_and_spe) + elif len(new_diff_spe)>2: + spe_and_text='' + for i in range(0,len(new_diff_spe)-1): + spe_and_text+=new_diff_spe[i][4]+', ' + spe_and_text1=spe_and_text[0:-2]+' and '+new_diff_spe[-1][4] + spe_and_text2=spe_and_text+'and '+new_diff_spe[-1][4] + if ori_text.find(spe_and_text1)>=0 or ori_text.find(spe_and_text2)>=0: + # print('old:',doc_specs[spe_sent]) + # print('new:',new_diff_spe) + mul_and_spe=list(_temp_speid) + # print(mul_and_spe) + Gene_type_list=['Gene','FamilyName','DomainMotif'] + for i,ele in enumerate(ori_context[pmid_text]): + #print(ele) + + if ele[5] in Gene_type_list: + gene_num+=1 + final_preds=set() + if ele[1] in pred_results[ele[0]].keys(): + temp_preds=pred_results[ele[0]][ele[1]]['score'] + + if temp_preds!=[['-']]: + if len(temp_preds)==1: + final_preds.add(temp_preds[0][0]) + else: + max_id='' + max_score=0 + for _temp_pred in temp_preds: + _score=float(_temp_pred[1]) + _id_ass=_temp_pred[0] + if len(mul_and_spe)>1: + if _score>0.5 and (_id_ass in mul_and_spe): + # print(_score) + final_preds.add(_id_ass) + if _score>max_score: + max_id=_id_ass + max_score=_score + if len(final_preds)==0: + final_preds.add(max_id) + # final_preds.add(multi_id) + else: #'-' nearst rule + gene_none+=1 + # print(mem_sent[ele[0]]) + _sent_id_gene=mem_sent[ele[0]][ele[1]] + + for j in range(i+1,len(ori_context[pmid_text])): + temp_seg=ori_context[pmid_text][j] + if temp_seg[5]=='Species': + after_species=[int(temp_seg[2]),temp_seg[6]] + break + # print(before_species,after_species) + # print(seg) + if before_species!=[] and after_species!=[]: + if len(ori_text[before_species[0]:int(ele[2])].split()) > len(ori_text[int(ele[3]):after_species[0]].split()): + final_preds.add(after_species[1]) + else: + final_preds.add(before_species[1]) + elif before_species==[]: + final_preds.add(after_species[1]) + elif after_species==[]: + final_preds.add(before_species[1]) + if len(final_preds)==0: + print('none pred!!!') + fout.write(ele[0]+'\t'+'\t'.join(ele[2:])+'\t'+','.join(final_preds)+'\n') + else: + # gene_none+=1 + # print(ele) + for j in range(i+1,len(ori_context[pmid_text])): + temp_seg=ori_context[pmid_text][j] + if temp_seg[5]=='Species': + after_species=[int(temp_seg[2]),temp_seg[6]] + break + # print(before_species,after_species) + # print(seg) + if before_species!=[] and after_species!=[]: + if len(ori_text[before_species[0]:int(ele[2])].split()) > len(ori_text[int(ele[3]):after_species[0]].split()): + final_preds.add(after_species[1]) + else: + final_preds.add(before_species[1]) + elif before_species==[]: + final_preds.add(after_species[1]) + elif after_species==[]: + final_preds.add(before_species[1]) + fout.write(ele[0]+'\t'+'\t'.join(ele[2:])+'\t'+','.join(final_preds)+'\n') + else: + fout.write(ele[0]+'\t'+'\t'.join(ele[2:])+'\t-\n') + before_species=[int(ele[3]),ele[6]] + fout.write('\n') + print('gene, none:',gene_num,gene_none) + fout.close() + +# major+and +def post_rule2(ori_context,token_text,infile): + # fin=open(infile,'r',encoding='utf-8') + # fout=open(outfile,'w',encoding='utf-8') + fin=io.StringIO(infile.getvalue()) + fout=io.StringIO() + pred_results={} #{pmid:{'M0':{'sent':'','offset':[sid,eid],'score':[[id,score],[id,score]]}}} #gene + species_index={} #{pmid:{sentid:[[spe_seg1],[spe_seg]]}} + species_count={}#{pmid:{speid:num}} + gene_num=0 + gene_none=0 + for line in fin: + seg=line.strip().split('\t') + if len(seg)>1: + if seg[6]=='Species': + if seg[0] not in species_count.keys(): + species_count[seg[0]]={seg[-2]:1} + else: + if seg[-2] not in species_count[seg[0]].keys(): + species_count[seg[0]][seg[-2]]=1 + else: + species_count[seg[0]][seg[-2]]+=1 + + if seg[0] not in species_index.keys(): + _sent_id=seg[2].split('-')[1] + species_index[seg[0]]={_sent_id:[seg]} + else: + _sent_id=seg[2].split('-')[1] + if _sent_id in species_index[seg[0]].keys(): + species_index[seg[0]][_sent_id].append(seg) + else: + species_index[seg[0]][_sent_id]=[seg] + else: + _pred_ids=seg[-1].split(',') + _temp_id_score=[] #[[spe_id,score]] + _sent_id=seg[2].split('-')[1] + for _pred_id in _pred_ids: + _temp_id_score.append(_pred_id.split('|')) + if seg[0] not in pred_results.keys(): + pred_results[seg[0]]={seg[2].split('-')[0]:{'sent':_sent_id,'offset':[seg[3],seg[4]],'score':_temp_id_score}} + else: + pred_results[seg[0]][seg[2].split('-')[0]]={'sent':_sent_id,'offset':[seg[3],seg[4]],'score':_temp_id_score} + fin.close() + #print(pred_results) + for pmid_text in ori_context.keys(): + #print(pmid_text) + lines=pmid_text.split('\n') + ori_text=lines[0].split('|t|')[1]+' '+lines[1].split('|a|')[1] + # print(ori_text) + fout.write(pmid_text+'\n') + pmid=lines[0].split('|t|')[0] + if pmid in species_count.keys(): + marjor_species = max(zip(species_count[pmid].values(), species_count[pmid].keys())) + else: + marjor_species = (1000,'*9606') + + if pmid in species_index.keys(): + doc_specs=species_index[pmid] + #mul and spe + mul_and_spe=[] + for spe_sent in doc_specs.keys(): + last_id='' + new_diff_spe=[] + _temp_speid=set() + for ele in doc_specs[spe_sent]: + if ele[-2] !=last_id: + new_diff_spe.append(ele) + last_id =ele[-2] + _temp_speid.add(ele[-2]) + else: + + new_diff_spe.pop() + new_diff_spe.append(ele) + _temp_speid.add(ele[-2]) + last_id =ele[-2] + if len(new_diff_spe)==2: + spe_and_text=new_diff_spe[0][5]+' and '+new_diff_spe[1][5] + if ori_text.find(spe_and_text)>=0: + # print('old:',doc_specs[spe_sent]) + # print('new:',new_diff_spe) + # print('\n') + mul_and_spe=list(_temp_speid) + # print(mul_and_spe) + elif len(new_diff_spe)>2: + spe_and_text='' + for i in range(0,len(new_diff_spe)-1): + spe_and_text+=new_diff_spe[i][5]+', ' + spe_and_text1=spe_and_text[0:-2]+' and '+new_diff_spe[-1][5] + spe_and_text2=spe_and_text+'and '+new_diff_spe[-1][5] + if ori_text.find(spe_and_text1)>=0 or ori_text.find(spe_and_text2)>=0: + # print('old:',doc_specs[spe_sent]) + # print('new:',new_diff_spe) + mul_and_spe=list(_temp_speid) + # print(mul_and_spe) + else: + mul_and_spe=[] + + Gene_type_list=['Gene','FamilyName','DomainMotif'] + for i,ele in enumerate(ori_context[pmid_text]): + #print(ele) + + if ele[6] in Gene_type_list: + gene_num+=1 + final_preds=set() + if (ele[0] in pred_results.keys()) and (ele[2] in pred_results[ele[0]].keys()): + temp_preds=pred_results[ele[0]][ele[2]]['score'] + + if temp_preds!=[['-']]: + if len(temp_preds)==1: + final_preds.add(temp_preds[0][0]) + else: + max_id='' + max_score=0 + for _temp_pred in temp_preds: + _score=float(_temp_pred[1]) + _id_ass=_temp_pred[0] + if len(mul_and_spe)>1: + if _score>0.5 and (_id_ass in mul_and_spe): + # print(_score) + final_preds.add(_id_ass) + if _score>max_score: + max_id=_id_ass + max_score=_score + if len(final_preds)==0: + final_preds.add(max_id) + # final_preds.add(multi_id) + else: #'-' major species + gene_none+=1 + # print(mem_sent[ele[0]]) + final_preds.add(marjor_species[1]) + if len(final_preds)==0: + print('none pred!!!') + fout.write(ele[0]+'\t'+ele[1]+'\t'+'\t'.join(ele[3:-1])+'\t'+','.join(final_preds).replace('*','')+'\n') + else: + final_preds.add(marjor_species[1]) + fout.write(ele[0]+'\t'+ele[1]+'\t'+'\t'.join(ele[3:-1])+'\t'+','.join(final_preds).replace('*','')+'\n') + else: + fout.write(ele[0]+'\t'+ele[1]+'\t'+'\t'.join(ele[3:])+'\n') + fout.write('\n') + # print('gene, none:',gene_num,gene_none) + return fout + +def ml_tag_main(fin_pubtator,nlp_token, nn_model): + #print('.......senten split, tokenizer.........') + #print('...in...\n',fin_pubtator.getvalue()) + ori_text_newentity,token_text,token_out=ssplit_token(fin_pubtator,nlp_token) + #print('...token....\n',token_out.getvalue()) + + #2. filter nest entity + nonest_out=filter_nest(token_out) + #print(nonest_out.getvalue()) + + #3.ml tag + #print('.......machine learning-based tagging.........') + + ml_out=ml_tag(nonest_out, nn_model) + #print('.....ml.....\n',ml_out.getvalue()) + + #4. post processing + #print('.......post processing.........') + post_out=post_rule2(ori_text_newentity,token_text,ml_out) + #print('.........ori_text...............\n', ori_text_newentity) + #print('.....post.....\n',post_out.getvalue()) + return post_out \ No newline at end of file diff --git a/tmBioC.key b/tmBioC.key index 67f2f95f7058eca66a3a8ec6ff7902de2d4c4a97..5feff7553f6a228c9522d22b1ae69c4a683efe4a 100755 --- a/tmBioC.key +++ b/tmBioC.key @@ -1,42 +1,42 @@ -PubTator.key - -A BioC format for PubTator and other NER tools (i.e., tmChem, DNorm, tmVar, SR4GN or GenNorm) developed at the Biomedical Text Mining group at NCBI -The goal of this collection is to provide easy access to the text and bio-concept annotations for PMC articles. - - collection: a group of PubMed documents, each document is organized into title, abstract and other passages - - source: PubMed, PubMed Central, etc. - - date: Document download date - - document: abstract, full-text article, free-text document, etc. - - id: PubMed ID (or other ID in a given collection) of the document - - passage: Title, abstract and other passages - - infon["type"]: "title", "abstract" and other passages - - offset: Title has an offset of zero, while the other passages (e.g., abstract) are assumed to begin after the previous passages and one space - - text: Text of the passage - - annotation: One bio-concept of the passage as determined by the tmChem, DNorm, tmVar, SR4GN or GenNorm - - infon["type"]: The type of bioconcept, e.g. "Gene", "Species", "Disease", "Chemical" or "Mutation" - - infon["MeSH"]: The bio-concept identifier in MeSH as detected by DNorm or tmChem - - infon["OMIM"]: The bio-concept identifier in OMIM as detected by DNorm - - infon["NCBI_Gene"]: The bio-concept identifier in NCBI Gene as detected by GenNorm - - infon["NCBI_Taxonomy"]: The bio-concept identifier in NCBI Taxonomy as detected by SR4GN - - infon["ChEBI"]: The bio-concept identifier in ChEBI as detected by tmChem - - infon["tmVar"]: The intelligent key generated artificially for the mention detected by tmVar (||||) - - location: location of the mention including the global document "offset" where a bio-concept is located and the "length" of the mention - - text: Mention of the bio-concept +PubTator.key + +A BioC format for PubTator and other NER tools (i.e., tmChem, DNorm, tmVar, SR4GN or GenNorm) developed at the Biomedical Text Mining group at NCBI +The goal of this collection is to provide easy access to the text and bio-concept annotations for PMC articles. + + collection: a group of PubMed documents, each document is organized into title, abstract and other passages + + source: PubMed, PubMed Central, etc. + + date: Document download date + + document: abstract, full-text article, free-text document, etc. + + id: PubMed ID (or other ID in a given collection) of the document + + passage: Title, abstract and other passages + + infon["type"]: "title", "abstract" and other passages + + offset: Title has an offset of zero, while the other passages (e.g., abstract) are assumed to begin after the previous passages and one space + + text: Text of the passage + + annotation: One bio-concept of the passage as determined by the tmChem, DNorm, tmVar, SR4GN or GenNorm + + infon["type"]: The type of bioconcept, e.g. "Gene", "Species", "Disease", "Chemical" or "Mutation" + + infon["MeSH"]: The bio-concept identifier in MeSH as detected by DNorm or tmChem + + infon["OMIM"]: The bio-concept identifier in OMIM as detected by DNorm + + infon["NCBI_Gene"]: The bio-concept identifier in NCBI Gene as detected by GenNorm + + infon["NCBI_Taxonomy"]: The bio-concept identifier in NCBI Taxonomy as detected by SR4GN + + infon["ChEBI"]: The bio-concept identifier in ChEBI as detected by tmChem + + infon["tmVar"]: The intelligent key generated artificially for the mention detected by tmVar (||||) + + location: location of the mention including the global document "offset" where a bio-concept is located and the "length" of the mention + + text: Mention of the bio-concept diff --git a/vocab/GeneNER_label.vocab b/vocab/GeneNER_label.vocab index a9ffab70e6ce1b49ca6376f83e2bf3479db78cd9..a262f5d85537ce70e0f1e24d52236729f76f13bb 100644 --- a/vocab/GeneNER_label.vocab +++ b/vocab/GeneNER_label.vocab @@ -1,5 +1,5 @@ -O -B-FamilyName -I-FamilyName -B-Gene -I-Gene +O +B-FamilyName +I-FamilyName +B-Gene +I-Gene diff --git a/vocab/SpeAss_IO_label.vocab b/vocab/SpeAss_IO_label.vocab index 3d1ef489a75fdb3c01dcc70fe7a25177a34d7326..6c9d144db07ffabac18a0e230e4190ca7091cc2f 100644 --- a/vocab/SpeAss_IO_label.vocab +++ b/vocab/SpeAss_IO_label.vocab @@ -1,2 +1,2 @@ -O -ARG2 +O +ARG2