File size: 1,993 Bytes
d5062c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/*
Identify sf & lf pairs from free text using multi-stage algorithm
process one line at a time and print out:
line
  sf|lf|P-precision|strategy
*/

#include "AbbrvE.h"
#include "AbbrStra.h"
#include <vector>
#include <map>
#include <string>

using namespace std;
using namespace iret;

namespace iret {

class AbbrOut {
public:
  string sf, lf, strat;
  double prec;

  AbbrOut( void ) : sf(""), lf(""), strat(""), prec(0)
  {}

  void print ( ostream & out ) {
    out << "  "  << sf << "|" << lf << "|" << prec;
  }

};


class Ab3P {
public:
  Ab3P( void );
  ~Ab3P(void) { delete wrdData; }

  /**  Collect text for later abbreviation finding. **/
  void add_text( const string & text ) {
    buffer += text;
  }
  void add_text( char * text ) {
    buffer += text;
  }

  /**  Sets abbrs to the abbreviations found in previous calls to add_text.
       Afterwords, resets the text buffer.  **/
  void get_abbrs( vector<AbbrOut> & abbrs ) {
    get_abbrs( buffer, abbrs );
    buffer = "";
  }

  /**  Sets abbrs to the abbreviations found in text
       Does not interfere with the add_text buffer.  **/
  void get_abbrs( const string & text, vector<AbbrOut> & abbrs ) {
    abbrs.clear();

    if(text.empty()) return; // skip empty line
    // const_cast need so correct get_abbrs get called,
    // otherwise, infinite loop
    get_abbrs( const_cast<char*>(text.c_str()), abbrs );
  }
  void get_abbrs( char * text, vector<AbbrOut> & abbrs );

  /**  Try a potential sf-lf form to find proper lf, strategy used,
       and pseudo-precision of result **/
  void try_pair( char * sf, char * lf, AbbrOut & abbr );

  /**
     psf -- pointer short form
     plf -- pointer long form
  **/
  void try_strats ( char * psf, char * plf, bool swap, AbbrOut & result );

  AbbrvE ab; //default # pairs = 10,000 
  map<string, double> stratPrec;
  StratUtil util;
  WordData *wrdData;            //set data needed for AbbrStra
  string buffer;                // collect text for later use
};

}