PythonTrie前缀树实现
============================================================
Python Trie前缀树实现 — 节点定义/插入搜索/自动补全/通配符
============================================================
Trie(前缀树/字典树)是一种高效的字符串检索结构,利用字符串的公共前缀来
减少查询时间。广泛应用于自动补全、拼写检查、IP路由等场景。
============================================================
1. Trie 节点定义
============================================================
class TrieNode:
"""前缀树节点类"""
def __init__(self):
self.children = {} # 子节点字典 {字符: TrieNode}
self.is_end = False # 标记此节点是否为某个单词的结尾
self.count = 0 # 经过此节点的单词数量(用于统计)
class Trie:
"""前缀树类"""
def __init__(self):
self.root = TrieNode()
============================================================
2. 插入操作(Insert)
============================================================
def insert(self, word):
"""向Trie中插入一个单词"""
node = self.root
for ch in word:
if ch not in node.children:
node.children[ch] = TrieNode() # 创建新节点
node = node.children[ch]
node.count += 1 # 经过此节点的单词数+1
node.is_end = True # 标记单词结尾
============================================================
3. 搜索操作(Search)
============================================================
def search(self, word):
"""精确搜索:判断word是否在Trie中"""
node = self._find_node(word)
return node is not None and node.is_end
def _find_node(self, prefix):
"""找到前缀对应的最后一个节点(辅助方法)"""
node = self.root
for ch in prefix:
if ch not in node.children:
return None # 前缀不存在
node = node.children[ch]
return node
def starts_with(self, prefix):
"""判断是否存在以prefix为前缀的单词"""
return self._find_node(prefix) is not None
============================================================
4. 删除操作(Delete)
============================================================
def delete(self, word):
"""从Trie中删除一个单词"""
def _delete(node, word, depth):
"""递归删除,自底向上清理无用节点"""
if depth == len(word):
if not node.is_end: # 单词不存在
return False
node.is_end = False # 取消单词结尾标记
return len(node.children) == 0 # 如果是叶子节点则返回True
ch = word[depth]
if ch not in node.children:
return False # 单词不存在
should_delete = _delete(node.children[ch], word, depth + 1)
if should_delete:
del node.children[ch] # 删除无用节点
node.count -= 1
return len(node.children) == 0 and not node.is_end
node.count -= 1
return False
_delete(self.root, word, 0)
============================================================
5. 自动补全实现(Auto-Complete)
============================================================
def auto_complete(self, prefix):
"""返回所有以prefix开头的单词"""
node = self._find_node(prefix)
if not node:
return [] # 没有匹配前缀
results = []
self._dfs_collect(node, list(prefix), results)
return results
def _dfs_collect(self, node, path_chars, results):
"""DFS收集所有以当前节点为前缀的完整单词"""
if node.is_end:
results.append(''.join(path_chars)) # 当前路径是一个完整单词
for ch in sorted(node.children): # 按字母顺序遍历
path_chars.append(ch)
self._dfs_collect(node.children[ch], path_chars, results)
path_chars.pop() # 回溯
============================================================
6. 通配符搜索(Word Search with Wildcard)
============================================================
def search_with_wildcard(self, word):
"""搜索支持通配符 '.' 匹配任意单个字符"""
def _search(node, word, idx):
if idx == len(word):
return node.is_end
ch = word[idx]
if ch == '.':
# 通配符:尝试所有子节点
for child in node.children.values():
if _search(child, word, idx + 1):
return True
return False
else:
if ch not in node.children:
return False
return _search(node.children[ch], word, idx + 1)
return _search(self.root, word, 0)
============================================================
7. 前缀匹配统计
============================================================
def prefix_count(self, prefix):
"""统计有多少个单词以prefix为前缀"""
node = self._find_node(prefix)
if not node:
return 0
# 统计以该节点为根的子树中所有单词的数量
def _count_words(node):
total = 1 if node.is_end else 0
for child in node.children.values():
total += _count_words(child)
return total
return _count_words(node)
def word_count(self):
"""返回Trie中单词总数"""
return self.prefix_count('')
============================================================
8. Trie vs set 性能对比
============================================================
# Trie的优势:
# - 前缀搜索(starts_with、auto_complete)是Trie的独有能力
# - 在大量字符串的公共前缀很长时更省内存
# - O(L)时间复杂度,L为单词长度,与存储的单词数量无关
#
# set/dict的优势:
# - 精确查找速度更快(哈希表O(1) vs Trie O(L))
# - 内存占用通常更小(没有额外的指针开销)
# - Python内置,使用更方便
============================================================
使用示例
============================================================
if __name__ == "__main__":
trie = Trie()
words = ["苹果", "苹果手机", "苹果电脑", "安卓", "安卓系统", "香蕉"]
for w in words:
trie.insert(w)
print("搜索'苹果':", trie.search("苹果")) # True
print("搜索'橘子':", trie.search("橘子")) # False
print("前缀'苹果':", trie.starts_with("苹果")) # True
# 自动补全测试
suggestions = trie.auto_complete("苹果")
print("'苹果'的补全建议:", suggestions) # ['苹果', '苹果手机', '苹果电脑']
# 通配符搜索(英文示例)
eng_trie = Trie()
for w in ["cat", "car", "bat", "bar", "catalog"]:
eng_trie.insert(w)
print("通配符c.t:", eng_trie.search_with_wildcard("c.t")) # True (cat)
print("通配符.og:", eng_trie.search_with_wildcard(".og")) # False (无匹配)
# 前缀统计
print("'苹果'相关的单词数:", trie.prefix_count("苹果")) # 3
# 删除测试
trie.delete("苹果")
print("删除'苹果'后搜索:", trie.search("苹果")) # False
print("删除后'苹果手机'仍然存在:", trie.search("苹果手机")) # True
