当前位置：首页 > news >正文

PythonTrie前缀树实现

news 2026/5/29 19:40:20

============================================================
Python Trie前缀树实现 — 节点定义/插入搜索/自动补全/通配符
============================================================

Trie（前缀树/字典树）是一种高效的字符串检索结构，利用字符串的公共前缀来
减少查询时间。广泛应用于自动补全、拼写检查、IP路由等场景。

============================================================
1. Trie 节点定义
============================================================

class TrieNode:
"""前缀树节点类"""
def __init__(self):
self.children = {} # 子节点字典 {字符: TrieNode}
self.is_end = False # 标记此节点是否为某个单词的结尾
self.count = 0 # 经过此节点的单词数量（用于统计）

class Trie:
"""前缀树类"""
def __init__(self):
self.root = TrieNode()

============================================================
2. 插入操作（Insert）
============================================================

def insert(self, word):
"""向Trie中插入一个单词"""
node = self.root
for ch in word:
if ch not in node.children:
node.children[ch] = TrieNode() # 创建新节点
node = node.children[ch]
node.count += 1 # 经过此节点的单词数+1
node.is_end = True # 标记单词结尾

============================================================
3. 搜索操作（Search）
============================================================

def search(self, word):
"""精确搜索：判断word是否在Trie中"""
node = self._find_node(word)
return node is not None and node.is_end

def _find_node(self, prefix):
"""找到前缀对应的最后一个节点（辅助方法）"""
node = self.root
for ch in prefix:
if ch not in node.children:
return None # 前缀不存在
node = node.children[ch]
return node

def starts_with(self, prefix):
"""判断是否存在以prefix为前缀的单词"""
return self._find_node(prefix) is not None

============================================================
4. 删除操作（Delete）
============================================================

def delete(self, word):
"""从Trie中删除一个单词"""
def _delete(node, word, depth):
"""递归删除，自底向上清理无用节点"""
if depth == len(word):
if not node.is_end: # 单词不存在
return False
node.is_end = False # 取消单词结尾标记
return len(node.children) == 0 # 如果是叶子节点则返回True
ch = word[depth]
if ch not in node.children:
return False # 单词不存在
should_delete = _delete(node.children[ch], word, depth + 1)
if should_delete:
del node.children[ch] # 删除无用节点
node.count -= 1
return len(node.children) == 0 and not node.is_end
node.count -= 1
return False
_delete(self.root, word, 0)

============================================================
5. 自动补全实现（Auto-Complete）
============================================================

def auto_complete(self, prefix):
"""返回所有以prefix开头的单词"""
node = self._find_node(prefix)
if not node:
return [] # 没有匹配前缀
results = []
self._dfs_collect(node, list(prefix), results)
return results

def _dfs_collect(self, node, path_chars, results):
"""DFS收集所有以当前节点为前缀的完整单词"""
if node.is_end:
results.append(''.join(path_chars)) # 当前路径是一个完整单词
for ch in sorted(node.children): # 按字母顺序遍历
path_chars.append(ch)
self._dfs_collect(node.children[ch], path_chars, results)
path_chars.pop() # 回溯

============================================================
6. 通配符搜索（Word Search with Wildcard）
============================================================

def search_with_wildcard(self, word):
"""搜索支持通配符 '.' 匹配任意单个字符"""
def _search(node, word, idx):
if idx == len(word):
return node.is_end
ch = word[idx]
if ch == '.':
# 通配符：尝试所有子节点
for child in node.children.values():
if _search(child, word, idx + 1):
return True
return False
else:
if ch not in node.children:
return False
return _search(node.children[ch], word, idx + 1)
return _search(self.root, word, 0)

============================================================
7. 前缀匹配统计
============================================================

def prefix_count(self, prefix):
"""统计有多少个单词以prefix为前缀"""
node = self._find_node(prefix)
if not node:
return 0
# 统计以该节点为根的子树中所有单词的数量
def _count_words(node):
total = 1 if node.is_end else 0
for child in node.children.values():
total += _count_words(child)
return total
return _count_words(node)

def word_count(self):
"""返回Trie中单词总数"""
return self.prefix_count('')

============================================================
8. Trie vs set 性能对比
============================================================
# Trie的优势：
# - 前缀搜索（starts_with、auto_complete）是Trie的独有能力
# - 在大量字符串的公共前缀很长时更省内存
# - O(L)时间复杂度，L为单词长度，与存储的单词数量无关
#
# set/dict的优势：
# - 精确查找速度更快（哈希表O(1) vs Trie O(L)）
# - 内存占用通常更小（没有额外的指针开销）
# - Python内置，使用更方便

============================================================
使用示例
============================================================
if __name__ == "__main__":
trie = Trie()
words = ["苹果", "苹果手机", "苹果电脑", "安卓", "安卓系统", "香蕉"]
for w in words:
trie.insert(w)

print("搜索'苹果':", trie.search("苹果")) # True
print("搜索'橘子':", trie.search("橘子")) # False
print("前缀'苹果':", trie.starts_with("苹果")) # True

# 自动补全测试
suggestions = trie.auto_complete("苹果")
print("'苹果'的补全建议:", suggestions) # ['苹果', '苹果手机', '苹果电脑']

# 通配符搜索（英文示例）
eng_trie = Trie()
for w in ["cat", "car", "bat", "bar", "catalog"]:
eng_trie.insert(w)
print("通配符c.t:", eng_trie.search_with_wildcard("c.t")) # True (cat)
print("通配符.og:", eng_trie.search_with_wildcard(".og")) # False (无匹配)

# 前缀统计
print("'苹果'相关的单词数:", trie.prefix_count("苹果")) # 3

# 删除测试
trie.delete("苹果")
print("删除'苹果'后搜索:", trie.search("苹果")) # False
print("删除后'苹果手机'仍然存在:", trie.search("苹果手机")) # True