from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
model_path = "./Qwen3.6-27B" # 替换为你下载的原始模型路径
quant_path = "./models/Qwen3.6-27B-AWQ-Local"
# 1. 加载模型与分词器
model = AutoAWQForCausalLM.from_pretrained(model_path, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# 2. 配置 AWQ 量化参数
quant_config = {
"zero_point": True,
"q_group_size": 128,
"w_bit": 4,
"version": "GEMM"
}
# 3. 执行量化(这一步极度消耗内存,请盯紧系统资源)
print("开始本地量化,请耐心等待...")
model.quantize(tokenizer, quant_config=quant_config)
# 4. 保存量化后的模型
print("保存量化模型中...")
model.save_quantized(quant_path)
tokenizer.save_pretrained(quant_path)
print("本地量化完成!")