1//! @file match.cpp
2//! @author ryftchen
3//! @brief The definitions (match) in the algorithm module.
4//! @version 0.1.0
5//! @copyright Copyright (c) 2022-2025 ryftchen. All rights reserved.
6
7#include "match.hpp"
8
9#include <cstring>
10#include <vector>
11
12namespace algorithm::match
13{
14//! @brief Function version number.
15//! @return version number (major.minor.patch)
16const char* version() noexcept
17{
18 static const char* const ver = "0.1.0";
19 return ver;
20}
21
22std::int64_t Match::rk(
23 const unsigned char* const text,
24 const unsigned char* const pattern,
25 const std::uint32_t textLen,
26 const std::uint32_t patternLen)
27{
28 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
29 {
30 return -1;
31 }
32
33 std::int64_t shift = -1;
34 constexpr std::int64_t rollingHashBase = maxASCII;
35 constexpr std::int64_t rollingHashMod = 1e9 + 7;
36 std::int64_t textHash = 0;
37 std::int64_t patternHash = 0;
38 std::int64_t pow = 1;
39
40 for (std::uint32_t i = 0; i < (patternLen - 1); ++i)
41 {
42 pow = (pow * rollingHashBase) % rollingHashMod;
43 }
44
45 for (std::uint32_t i = 0; i < patternLen; ++i)
46 {
47 patternHash = (rollingHashBase * patternHash + pattern[i]) % rollingHashMod;
48 textHash = (rollingHashBase * textHash + text[i]) % rollingHashMod;
49 }
50
51 for (std::uint32_t i = 0; i <= (textLen - patternLen); ++i)
52 {
53 if ((patternHash == textHash) && (std::memcmp(s1: text + i, s2: pattern, n: patternLen) == 0))
54 {
55 shift = i;
56 break;
57 }
58
59 if (i < (textLen - patternLen))
60 {
61 textHash = (rollingHashBase * (textHash - text[i] * pow) + text[i + patternLen]) % rollingHashMod;
62 if (textHash < 0)
63 {
64 textHash += rollingHashMod;
65 }
66 }
67 }
68 return shift;
69}
70
71std::int64_t Match::kmp(
72 const unsigned char* const text,
73 const unsigned char* const pattern,
74 const std::uint32_t textLen,
75 const std::uint32_t patternLen)
76{
77 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
78 {
79 return -1;
80 }
81
82 std::int64_t shift = -1;
83 std::vector<std::uint32_t> next(patternLen + 1, 0);
84
85 for (std::uint32_t i = 1; i < patternLen; ++i)
86 {
87 std::uint32_t j = next[i + 1];
88 while ((j > 0) && (pattern[j] != pattern[i]))
89 {
90 j = next[j];
91 }
92
93 if ((j > 0) || (pattern[j] == pattern[i]))
94 {
95 next[i + 1] = j + 1;
96 }
97 }
98
99 for (std::uint32_t i = 0, j = 0; i < textLen; ++i)
100 {
101 if (*(text + i) == *(pattern + j))
102 {
103 if (++j == patternLen)
104 {
105 shift = i - j + 1;
106 break;
107 }
108 }
109 else if (j > 0)
110 {
111 j = next[j];
112 --i;
113 }
114 }
115 return shift;
116}
117
118std::int64_t Match::bm(
119 const unsigned char* const text,
120 const unsigned char* const pattern,
121 const std::uint32_t textLen,
122 const std::uint32_t patternLen)
123{
124 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
125 {
126 return -1;
127 }
128
129 std::int64_t shift = -1;
130 std::uint32_t badCharRuleTable[maxASCII] = {};
131 std::uint32_t goodSuffixIndexTable[maxASCII] = {};
132
133 fillBadCharRuleTable(badCharRuleTable, pattern, patternLen);
134 fillGoodSuffixRuleTable(goodSuffixRuleTable: goodSuffixIndexTable, pattern, patternLen);
135
136 std::uint32_t textIdx = patternLen - 1;
137 while (textIdx < textLen)
138 {
139 std::uint32_t patternIdx = patternLen - 1;
140 while ((patternIdx > 0) && (text[textIdx] == pattern[patternIdx]))
141 {
142 --textIdx;
143 --patternIdx;
144 }
145 if ((patternIdx == 0) && (text[textIdx] == pattern[patternIdx]))
146 {
147 shift = textIdx;
148 break;
149 }
150
151 textIdx += std::max(a: badCharRuleTable[text[textIdx]], b: goodSuffixIndexTable[patternIdx]);
152 }
153 return shift;
154}
155
156void Match::fillBadCharRuleTable(
157 std::uint32_t badCharRuleTable[], const unsigned char* const pattern, const std::uint32_t patternLen)
158{
159 for (std::uint16_t i = 0; i < maxASCII; ++i)
160 {
161 badCharRuleTable[i] = patternLen;
162 }
163
164 for (std::uint32_t j = 0; j < patternLen; ++j)
165 {
166 badCharRuleTable[pattern[j]] = patternLen - 1 - j;
167 }
168}
169
170void Match::fillGoodSuffixRuleTable(
171 std::uint32_t goodSuffixRuleTable[], const unsigned char* const pattern, const std::uint32_t patternLen)
172{
173 std::uint32_t lastPrefixIdx = 1;
174 for (std::int64_t pos = (patternLen - 1); pos >= 0; --pos)
175 {
176 const std::uint32_t suffixLen = patternLen - (pos + 1);
177 bool isPrefix = true;
178 for (std::uint32_t i = 0; i < suffixLen; ++i)
179 {
180 if (pattern[i] != pattern[pos + 1 + i])
181 {
182 isPrefix = false;
183 break;
184 }
185 }
186 if (isPrefix)
187 {
188 lastPrefixIdx = pos + 1;
189 }
190
191 goodSuffixRuleTable[pos] = lastPrefixIdx + (patternLen - 1 - pos);
192 }
193
194 for (std::uint32_t pos = 0; pos < (patternLen - 1); ++pos)
195 {
196 std::uint32_t suffixLen = 0;
197 while ((suffixLen <= pos) && (suffixLen <= (patternLen - 1))
198 && (pattern[pos - suffixLen] == pattern[patternLen - 1 - suffixLen]))
199 {
200 ++suffixLen;
201 }
202 if ((suffixLen <= pos) && (suffixLen <= (patternLen - 1))
203 && (pattern[pos - suffixLen] != pattern[patternLen - 1 - suffixLen]))
204 {
205 goodSuffixRuleTable[patternLen - 1 - suffixLen] = patternLen - 1 - pos + suffixLen;
206 }
207 }
208}
209
210std::int64_t Match::horspool(
211 const unsigned char* const text,
212 const unsigned char* const pattern,
213 const std::uint32_t textLen,
214 const std::uint32_t patternLen)
215{
216 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
217 {
218 return -1;
219 }
220
221 std::int64_t shift = -1;
222 std::uint32_t badCharShiftTable[maxASCII] = {};
223
224 fillHorspoolBadCharShiftTable(badCharShiftTable, pattern, patternLen);
225
226 std::uint32_t moveLen = patternLen - 1;
227 while (moveLen <= (textLen - 1))
228 {
229 std::uint32_t matchLen = 0;
230 while ((matchLen <= (patternLen - 1)) && (pattern[patternLen - 1 - matchLen] == text[moveLen - matchLen]))
231 {
232 ++matchLen;
233 }
234
235 if (matchLen == patternLen)
236 {
237 shift = moveLen - patternLen + 1;
238 break;
239 }
240
241 moveLen += badCharShiftTable[text[moveLen]];
242 }
243 return shift;
244}
245
246void Match::fillHorspoolBadCharShiftTable(
247 std::uint32_t badCharShiftTable[], const unsigned char* const pattern, const std::uint32_t patternLen)
248{
249 for (std::uint16_t i = 0; i < maxASCII; ++i)
250 {
251 badCharShiftTable[i] = patternLen;
252 }
253
254 for (std::uint32_t j = 0; j < (patternLen - 1); ++j)
255 {
256 badCharShiftTable[pattern[j]] = patternLen - 1 - j;
257 }
258}
259
260std::int64_t Match::sunday(
261 const unsigned char* const text,
262 const unsigned char* const pattern,
263 const std::uint32_t textLen,
264 const std::uint32_t patternLen)
265{
266 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
267 {
268 return -1;
269 }
270
271 std::int64_t shift = -1;
272 std::uint32_t badCharShiftTable[maxASCII] = {};
273
274 fillSundayBadCharShiftTable(badCharShiftTable, pattern, patternLen);
275
276 std::uint32_t textIdx = 0;
277 while (textIdx <= (textLen - patternLen))
278 {
279 std::uint32_t matchLen = 0;
280 while (text[textIdx + matchLen] == pattern[matchLen])
281 {
282 ++matchLen;
283 if (matchLen == patternLen)
284 {
285 shift = textIdx;
286 return shift;
287 }
288 }
289
290 textIdx += badCharShiftTable[text[textIdx + patternLen]];
291 }
292 return shift;
293}
294
295void Match::fillSundayBadCharShiftTable(
296 std::uint32_t badCharShiftTable[], const unsigned char* const pattern, const std::uint32_t patternLen)
297{
298 for (std::uint16_t i = 0; i < maxASCII; ++i)
299 {
300 badCharShiftTable[i] = patternLen + 1;
301 }
302
303 for (std::uint32_t j = 0; j < patternLen; ++j)
304 {
305 badCharShiftTable[pattern[j]] = patternLen - j;
306 }
307}
308} // namespace algorithm::match
309