利用前缀表跳过无效比较
KMP(Knuth-Morris-Pratt)算法是一种高效的字符串匹配算法。它利用已匹配部分的信息,避免从头开始重新匹配,通过"前缀表"(也叫next数组)实现O(n+m)的时间复杂度。
例如 "ABAB" 的最长相等前后缀是 "AB",长度为2。
| 阶段 | 复杂度 | 说明 |
|---|---|---|
| 构建前缀表 | O(M) | M是模式串长度 |
| 匹配过程 | O(N) | N是文本串长度 |
| 总时间 | O(N+M) | 远优于朴素算法的O(N×M) |
| 空间复杂度 | O(M) | 存储前缀表 |
def build_next(pattern): """构建前缀表(Next数组)""" m = len(pattern) next_arr = [0] * m j = 0 for i in range(1, m): while j > 0 and pattern[i] != pattern[j]: j = next_arr[j - 1] if pattern[i] == pattern[j]: j += 1 next_arr[i] = j return next_arr def kmp_search(text, pattern): """KMP字符串匹配""" if not pattern: return [] next_arr = build_next(pattern) results = [] j = 0 for i in range(len(text)): while j > 0 and text[i] != pattern[j]: j = next_arr[j - 1] if text[i] == pattern[j]: j += 1 if j == len(pattern): results.append(i - j + 1) j = next_arr[j - 1] return results # 使用示例 text = "ABABDABACDABABCABAB" pattern = "ABABCABAB" positions = kmp_search(text, pattern) print(f"匹配位置: {positions}") # 输出: 匹配位置: [10]
import java.util.*; public class KMP { /** * 构建前缀表(Next数组) */ private static int[] buildNext(String pattern) { int m = pattern.length(); int[] next = new int[m]; int j = 0; for (int i = 1; i < m; i++) { while (j > 0 && pattern.charAt(i) != pattern.charAt(j)) { j = next[j - 1]; } if (pattern.charAt(i) == pattern.charAt(j)) { j++; } next[i] = j; } return next; } /** * KMP字符串匹配 */ public static List<Integer> kmpSearch(String text, String pattern) { List<Integer> results = new ArrayList<>(); if (pattern.isEmpty()) return results; int[] next = buildNext(pattern); int j = 0; for (int i = 0; i < text.length(); i++) { while (j > 0 && text.charAt(i) != pattern.charAt(j)) { j = next[j - 1]; } if (text.charAt(i) == pattern.charAt(j)) { j++; } if (j == pattern.length()) { results.add(i - j + 1); j = next[j - 1]; } } return results; } public static void main(String[] args) { String text = "ABABDABACDABABCABAB"; String pattern = "ABABCABAB"; List<Integer> positions = kmpSearch(text, pattern); System.out.println("匹配位置: " + positions); // 输出: 匹配位置: [10] } }