Spaces:
Sleeping
Sleeping
/* | |
* esa.hxx | |
* Copyright (c) 2010 Daisuke Okanohara All Rights Reserved. | |
* | |
* Permission is hereby granted, free of charge, to any person | |
* obtaining a copy of this software and associated documentation | |
* files (the "Software"), to deal in the Software without | |
* restriction, including without limitation the rights to use, | |
* copy, modify, merge, publish, distribute, sublicense, and/or sell | |
* copies of the Software, and to permit persons to whom the | |
* Software is furnished to do so, subject to the following | |
* conditions: | |
* | |
* The above copyright notice and this permission notice shall be | |
* included in all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
* OTHER DEALINGS IN THE SOFTWARE. | |
*/ | |
namespace esaxx_private { | |
template<typename string_type, typename sarray_type, typename index_type> | |
index_type suffixtree(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, index_type n){ | |
if (n == 0){ | |
return 0; | |
} | |
sarray_type Psi = L; | |
Psi[SA[0]] = SA[n-1]; | |
for (index_type i = 1; i < n; ++i){ | |
Psi[SA[i]] = SA[i-1]; | |
} | |
// Compare at most 2n log n charcters. Practically fastest | |
// "Permuted Longest-Common-Prefix Array", Juha Karkkainen, CPM 09 | |
sarray_type PLCP = R; | |
index_type h = 0; | |
for (index_type i = 0; i < n; ++i){ | |
index_type j = Psi[i]; | |
while (i+h < n && j+h < n && | |
T[i+h] == T[j+h]){ | |
++h; | |
} | |
PLCP[i] = h; | |
if (h > 0) --h; | |
} | |
sarray_type H = L; | |
for (index_type i = 0; i < n; ++i){ | |
H[i] = PLCP[SA[i]]; | |
} | |
H[0] = -1; | |
std::vector<std::pair<index_type, index_type> > S; | |
S.push_back(std::make_pair((index_type)-1, (index_type)-1)); | |
size_t nodeNum = 0; | |
for (index_type i = 0; ; ++i){ | |
std::pair<index_type, index_type> cur (i, (i == n) ? -1 : H[i]); | |
std::pair<index_type, index_type> cand(S.back()); | |
while (cand.second > cur.second){ | |
if (i - cand.first > 1){ | |
L[nodeNum] = cand.first; | |
R[nodeNum] = i; | |
D[nodeNum] = cand.second; | |
++nodeNum; | |
} | |
cur.first = cand.first; | |
S.pop_back(); | |
cand = S.back(); | |
} | |
if (cand.second < cur.second){ | |
S.push_back(cur); | |
} | |
if (i == n) break; | |
S.push_back(std::make_pair(i, n - SA[i] + 1)); | |
} | |
return nodeNum; | |
} | |
} | |
/** | |
* @brief Build an enhanced suffix array of a given string in linear time | |
* For an input text T, esaxx() builds an enhancd suffix array in linear time. | |
* i-th internal node is represented as a triple (L[i], R[i], D[i]); | |
* L[i] and R[i] is the left/right boundary of the suffix array as SA[L[i]....R[i]-1] | |
* D[i] is the depth of the internal node | |
* The number of internal node is at most N-1 and return the actual number by | |
* @param T[0...n-1] The input string. (random access iterator) | |
* @param SA[0...n-1] The output suffix array (random access iterator) | |
* @param L[0...n-1] The output left boundary of internal node (random access iterator) | |
* @param R[0...n-1] The output right boundary of internal node (random access iterator) | |
* @param D[0...n-1] The output depth of internal node (random access iterator) | |
* @param n The length of the input string | |
* @param k The alphabet size | |
* @pram nodeNum The output the number of internal node | |
* @return 0 if succeded, -1 or -2 otherwise | |
*/ | |
template<typename string_type, typename sarray_type, typename index_type> | |
int esaxx(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, | |
index_type n, index_type k, index_type& nodeNum) { | |
if ((n < 0) || (k <= 0)) return -1; | |
int err = saisxx(T, SA, n, k); | |
if (err != 0){ | |
return err; | |
} | |
nodeNum = esaxx_private::suffixtree(T, SA, L, R, D, n); | |
return 0; | |
} | |