正如马丁所说,您可以在字符串匹配中实现一些众所周知的最快算法,高德纳-莫里斯-普拉特字符串搜索算法(或KMP算法)搜索“单词”的出现W
在主“文本字符串”内S
.
算法有复杂度O(n), where n的长度是S
和O is 大 O 表示法.
extension String {
// Build pi function of prefixes
private func build_pi(str: String) -> [Int] {
var n = count(str)
var pi = Array(count: n + 1, repeatedValue: 0)
var k = -1
pi[0] = -1
for (var i = 0; i < n; ++i) {
while (k >= 0 && str[k] != str[i]) {
k = pi[k]
}
pi[i + 1] = ++k
}
return pi
}
// Knuth-Morris Pratt algorithm
func searchPattern(pattern: String) -> [Int] {
var matches = [Int]()
var n = count(self)
var m = count(pattern)
var k = 0
var pi = build_pi(pattern)
for var i = 0; i < n; ++i {
while (k >= 0 && (k == m || pattern[k] != self[i])) {
k = pi[k]
}
if ++k == m {
matches.append(i - m + 1)
}
}
return matches
}
subscript (i: Int) -> Character {
return self[advance(self.startIndex, i)]
}
}
然后您可以通过以下方式使用它:
var string = "apurba mandal loves ayoshi loves"
var pattern = "loves"
println(string.searchPattern(pattern))
输出应该是:
[14, 27]
它属于字符串内模式出现的起始索引。我希望这对你有帮助。
EDIT:
正如马丁在他的评论中所说,你需要避免使用advance
索引函数String
by an Int
因为它是O(到索引的位置).
一种可能的解决方案是将String
到一个数组Character
然后访问索引是O(1).
然后extension
可以改成这个:
extension String {
// Build pi function of prefixes
private func build_pi(str: [Character]) -> [Int] {
var n = count(str)
var pi = Array(count: n + 1, repeatedValue: 0)
var k = -1
pi[0] = -1
for (var i = 0; i < n; ++i) {
while (k >= 0 && str[k] != str[i]) {
k = pi[k]
}
pi[i + 1] = ++k
}
return pi
}
// Knuth-Morris Pratt algorithm
func searchPattern(pattern: String) -> [Int] {
// Convert to Character array to index in O(1)
var patt = Array(pattern)
var S = Array(self)
var matches = [Int]()
var n = count(self)
var m = count(pattern)
var k = 0
var pi = build_pi(patt)
for var i = 0; i < n; ++i {
while (k >= 0 && (k == m || patt[k] != S[i])) {
k = pi[k]
}
if ++k == m {
matches.append(i - m + 1)
}
}
return matches
}
}