1//! @file match.cpp
2//! @author ryftchen
3//! @brief The definitions (match) in the algorithm module.
4//! @version 0.1.0
5//! @copyright Copyright (c) 2022-2026 ryftchen. All rights reserved.
6
7#include "match.hpp"
8
9#include <cstring>
10#include <vector>
11
12namespace algorithm::match
13{
14//! @brief Function version number.
15//! @return version number (major.minor.patch)
16const char* version() noexcept
17{
18 static const char* const ver = "0.1.0";
19 return ver;
20}
21
22std::int64_t Match::rk(
23 const unsigned char* const text,
24 const unsigned char* const pattern,
25 const std::uint32_t textLen,
26 const std::uint32_t patternLen)
27{
28 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
29 {
30 return -1;
31 }
32
33 std::int64_t shift = -1;
34 constexpr std::int64_t rollingHashBase = maxASCII;
35 constexpr std::int64_t rollingHashMod = 1e9 + 7;
36 std::int64_t textHash = 0;
37 std::int64_t patternHash = 0;
38 std::int64_t pow = 1;
39
40 for (std::uint32_t i = 0; i < (patternLen - 1); ++i)
41 {
42 pow = (pow * rollingHashBase) % rollingHashMod;
43 }
44
45 for (std::uint32_t i = 0; i < patternLen; ++i)
46 {
47 patternHash = (rollingHashBase * patternHash + pattern[i]) % rollingHashMod;
48 textHash = (rollingHashBase * textHash + text[i]) % rollingHashMod;
49 }
50
51 for (std::uint32_t i = 0; i <= (textLen - patternLen); ++i)
52 {
53 if ((patternHash == textHash) && (std::memcmp(s1: text + i, s2: pattern, n: patternLen) == 0))
54 {
55 shift = i;
56 break;
57 }
58
59 if (i < (textLen - patternLen))
60 {
61 textHash = (rollingHashBase * (textHash - text[i] * pow) + text[i + patternLen]) % rollingHashMod;
62 if (textHash < 0)
63 {
64 textHash += rollingHashMod;
65 }
66 }
67 }
68 return shift;
69}
70
71std::int64_t Match::kmp(
72 const unsigned char* const text,
73 const unsigned char* const pattern,
74 const std::uint32_t textLen,
75 const std::uint32_t patternLen)
76{
77 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
78 {
79 return -1;
80 }
81
82 std::int64_t shift = -1;
83 std::vector<std::uint32_t> next(patternLen + 1, 0);
84
85 for (std::uint32_t i = 1; i < patternLen; ++i)
86 {
87 std::uint32_t j = next[i + 1];
88 while ((j > 0) && (pattern[j] != pattern[i]))
89 {
90 j = next[j];
91 }
92
93 if ((j > 0) || (pattern[j] == pattern[i]))
94 {
95 next[i + 1] = j + 1;
96 }
97 }
98
99 for (std::uint32_t i = 0, j = 0; i < textLen; ++i)
100 {
101 if (*(text + i) == *(pattern + j))
102 {
103 if (++j == patternLen)
104 {
105 shift = i - j + 1;
106 break;
107 }
108 }
109 else if (j > 0)
110 {
111 j = next[j];
112 --i;
113 }
114 }
115 return shift;
116}
117
118std::int64_t Match::bm(
119 const unsigned char* const text,
120 const unsigned char* const pattern,
121 const std::uint32_t textLen,
122 const std::uint32_t patternLen)
123{
124 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
125 {
126 return -1;
127 }
128
129 std::int64_t shift = -1;
130 std::array<std::uint32_t, maxASCII> badCharRuleTable{};
131 std::array<std::uint32_t, maxASCII> goodSuffixIndexTable{};
132 fillBadCharRuleTable(badCharRuleTable, pattern, patternLen);
133 fillGoodSuffixRuleTable(goodSuffixRuleTable&: goodSuffixIndexTable, pattern, patternLen);
134
135 std::uint32_t textIdx = patternLen - 1;
136 while (textIdx < textLen)
137 {
138 std::uint32_t patternIdx = patternLen - 1;
139 while ((patternIdx > 0) && (text[textIdx] == pattern[patternIdx]))
140 {
141 --textIdx;
142 --patternIdx;
143 }
144 if ((patternIdx == 0) && (text[textIdx] == pattern[patternIdx]))
145 {
146 shift = textIdx;
147 break;
148 }
149
150 textIdx += std::max(a: badCharRuleTable[text[textIdx]], b: goodSuffixIndexTable[patternIdx]);
151 }
152 return shift;
153}
154
155void Match::fillBadCharRuleTable(
156 std::array<std::uint32_t, maxASCII>& badCharRuleTable,
157 const unsigned char* const pattern,
158 const std::uint32_t patternLen)
159{
160 badCharRuleTable.fill(u: patternLen);
161
162 for (std::uint32_t i = 0; i < patternLen; ++i)
163 {
164 badCharRuleTable[pattern[i]] = patternLen - 1 - i;
165 }
166}
167
168void Match::fillGoodSuffixRuleTable(
169 std::array<std::uint32_t, maxASCII>& goodSuffixRuleTable,
170 const unsigned char* const pattern,
171 const std::uint32_t patternLen)
172{
173 std::uint32_t lastPrefixIdx = 1;
174 for (std::int64_t pos = (patternLen - 1); pos >= 0; --pos)
175 {
176 const std::uint32_t suffixLen = patternLen - (pos + 1);
177 bool isPrefix = true;
178 for (std::uint32_t i = 0; i < suffixLen; ++i)
179 {
180 if (pattern[i] != pattern[pos + 1 + i])
181 {
182 isPrefix = false;
183 break;
184 }
185 }
186 if (isPrefix)
187 {
188 lastPrefixIdx = pos + 1;
189 }
190
191 goodSuffixRuleTable[pos] = lastPrefixIdx + (patternLen - 1 - pos);
192 }
193
194 for (std::uint32_t pos = 0; pos < (patternLen - 1); ++pos)
195 {
196 std::uint32_t suffixLen = 0;
197 while ((suffixLen <= pos) && (suffixLen <= (patternLen - 1))
198 && (pattern[pos - suffixLen] == pattern[patternLen - 1 - suffixLen]))
199 {
200 ++suffixLen;
201 }
202 if ((suffixLen <= pos) && (suffixLen <= (patternLen - 1))
203 && (pattern[pos - suffixLen] != pattern[patternLen - 1 - suffixLen]))
204 {
205 goodSuffixRuleTable[patternLen - 1 - suffixLen] = patternLen - 1 - pos + suffixLen;
206 }
207 }
208}
209
210std::int64_t Match::horspool(
211 const unsigned char* const text,
212 const unsigned char* const pattern,
213 const std::uint32_t textLen,
214 const std::uint32_t patternLen)
215{
216 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
217 {
218 return -1;
219 }
220
221 std::int64_t shift = -1;
222 std::array<std::uint32_t, maxASCII> badCharShiftTable{};
223 fillHorspoolBadCharShiftTable(badCharShiftTable, pattern, patternLen);
224
225 std::uint32_t moveLen = patternLen - 1;
226 while (moveLen <= (textLen - 1))
227 {
228 std::uint32_t matchLen = 0;
229 while ((matchLen <= (patternLen - 1)) && (pattern[patternLen - 1 - matchLen] == text[moveLen - matchLen]))
230 {
231 ++matchLen;
232 }
233
234 if (matchLen == patternLen)
235 {
236 shift = moveLen - patternLen + 1;
237 break;
238 }
239
240 moveLen += badCharShiftTable[text[moveLen]];
241 }
242 return shift;
243}
244
245void Match::fillHorspoolBadCharShiftTable(
246 std::array<std::uint32_t, maxASCII>& badCharShiftTable,
247 const unsigned char* const pattern,
248 const std::uint32_t patternLen)
249{
250 badCharShiftTable.fill(u: patternLen);
251
252 for (std::uint32_t i = 0; i < (patternLen - 1); ++i)
253 {
254 badCharShiftTable[pattern[i]] = patternLen - 1 - i;
255 }
256}
257
258std::int64_t Match::sunday(
259 const unsigned char* const text,
260 const unsigned char* const pattern,
261 const std::uint32_t textLen,
262 const std::uint32_t patternLen)
263{
264 if (!text || !pattern || (textLen == 0) || (patternLen == 0) || (textLen < patternLen))
265 {
266 return -1;
267 }
268
269 std::int64_t shift = -1;
270 std::array<std::uint32_t, maxASCII> badCharShiftTable{};
271 fillSundayBadCharShiftTable(badCharShiftTable, pattern, patternLen);
272
273 std::uint32_t textIdx = 0;
274 while (textIdx <= (textLen - patternLen))
275 {
276 std::uint32_t matchLen = 0;
277 while (text[textIdx + matchLen] == pattern[matchLen])
278 {
279 ++matchLen;
280 if (matchLen == patternLen)
281 {
282 shift = textIdx;
283 return shift;
284 }
285 }
286
287 textIdx += badCharShiftTable[text[textIdx + patternLen]];
288 }
289 return shift;
290}
291
292void Match::fillSundayBadCharShiftTable(
293 std::array<std::uint32_t, maxASCII>& badCharShiftTable,
294 const unsigned char* const pattern,
295 const std::uint32_t patternLen)
296{
297 badCharShiftTable.fill(u: patternLen + 1);
298
299 for (std::uint32_t i = 0; i < patternLen; ++i)
300 {
301 badCharShiftTable[pattern[i]] = patternLen - i;
302 }
303}
304} // namespace algorithm::match
305