MLIR (Multi-Level Intermediate Representation) 是一个可扩展的编译器基础设施,旨在解决传统编译器中间表示的局限性。与LLVM IR不同,MLIR:
Python DSL在MLIR中主要用于:
PythonDSL代码
↓
PythonAPI调用
↓
PythonBindings (nanobind/pybind11)
↓
MLIRC API
↓
MLIRC++ Core
↓
MLIRDialect Operations
↓
PassPipeline (优化和转换)
↓
Lower-levelDialects
↓
LLVMIR / 机器码
Operation是MLIR中的核心抽象单元,类似于LLVM中的指令。每个Operation包含:
toy.transpose)示例MLIR操作:
%t_tensor = "toy.transpose"(%tensor) {inplace = true} : (tensor<2x3xf64>) -> tensor<3x2xf64> loc("file.toy":12:1)
Dialect是一组相关操作、类型和属性的命名空间。MLIR通过Dialect系统实现可扩展性:
MLIR提供丰富的类型系统:
MLIR支持在同一IR中混合不同抽象层次:
// 高层Toy操作
%0 = toy.mul %a, %b : tensor<2x3xf64>
// 中层Linalg操作
%1 = linalg.matmul ins(%a, %b : tensor<2x3xf64>, tensor<3x4xf64>)
outs(%c : tensor<2x4xf64>) -> tensor<2x4xf64>
// 低层Affine循环
affine.for %i = 0 to 10 {
%val = affine.load %memref[%i] : memref<10xf32>
}
// LLVM IR级别
%2 = llvm.add %x, %y : i32
MLIR的Python绑定采用分层设计:
┌─────────────────────────────────────┐
│ Python用户代码 │
│ (DSL定义、IR构建、Pass执行) │
└─────────────────┬───────────────────┘
↓
┌─────────────────────────────────────┐
│ Python API层 │
│ mlir.ir, mlir.dialects.* │
└─────────────────┬───────────────────┘
↓
┌─────────────────────────────────────┐
│ Python Bindings (nanobind) │
│ C++封装层 │
└─────────────────┬───────────────────┘
↓
┌─────────────────────────────────────┐
│ MLIR C API │
│ mlir-c/IR.h, mlir-c/Pass.h等 │
└─────────────────┬───────────────────┘
↓
┌─────────────────────────────────────┐
│ MLIR C++ Core │
│ mlir::Operation, mlir::Type等 │
└─────────────────────────────────────┘
提供核心IR构建功能:
from mlir.ir import Context, Module, Location, InsertionPoint
from mlir.ir import IntegerType, F32Type, RankedTensorType
from mlir.ir import Attribute, IntegerAttr, FloatAttr
# 创建MLIR上下文
with Context() as ctx:
# 创建模块
module = Module.create()
# 设置位置和插入点
with InsertionPoint(module.body), Location.unknown():
# 构建操作
pass
提供各个Dialect的Python接口:
from mlir.dialects import arith, func, tensor
from mlir.dialects.linalg import matmul
# 使用dialect操作
result = arith.addi(lhs, rhs)
MLIR使用nanobind(之前使用pybind11)将C++ API暴露给Python:
// mlir/lib/Bindings/Python/IRCore.cpp
NB_MODULE(_mlir, m) {
m.doc() = "MLIR Python Native Extension";
// 定义Python类绑定
nb::class_<PyMlirContext>(m, "Context")
.def(nb::init<>())
.def("parse_module", &PyMlirContext::parseModule)
.def_prop_ro("dialects", &PyMlirContext::getDialects);
nb::class_<PyModule>(m, "Module")
.def_static("parse", &PyModule::parse)
.def_static("create", &PyModule::create);
}
Python绑定使用上下文管理器简化IR构建:
# Context作为上下文管理器
with Context() as ctx:
# 在此上下文中创建的所有IR对象都关联到ctx
module = Module.create()
# Location上下文管理器
with Location.file("source.py", line=10, col=5):
# 在此创建的操作都带有该位置信息
op = Operation.create(...)
# InsertionPoint上下文管理器
with InsertionPoint(block):
# 在此创建的操作都插入到block中
op = Operation.create(...)
from mlir.ir import Operation, Context, Location, InsertionPoint
with Context():
module = Module.create()
with InsertionPoint(module.body), Location.unknown():
# 使用通用Operation.create方法
op = Operation.create(
name="toy.constant",
results=[tensor_type],
operands=[],
attributes={"value": dense_attr},
regions=0
)
OpView提供类型安全的操作构建接口:
from mlir.dialects import arith
# 使用OpView子类的构造函数
const_op = arith.ConstantOp(result_type, value_attr)
add_op = arith.AddIOp(lhs, rhs)
Linalg OpDSL是MLIR中最典型的Python DSL,用于定义结构化线性代数操作。
from mlir.dialects.linalg.opdsl.lang import *
# 定义类型变量
T1 = TV.T1
T2 = TV.T2
# 使用装饰器定义结构化操作
@linalg_structured_op
defmatmul(A=TensorDef(T1, S.M, S.K),
B=TensorDef(T2, S.K, S.N),
C=TensorDef(U, S.M, S.N, output=True)):
"""执行两个2D输入的矩阵乘法。
对内部乘法的操作数执行数值转换,将它们提升到与累加器/输出相同的数据类型。
"""
domain(D.m, D.n, D.k)
defines(Canonicalizer)
implements(ContractionOpInterface)
C[D.m, D.n] += TypeFn.cast_signed(U, A[D.m, D.k]) * TypeFn.cast_signed(U, B[D.k, D.n])
参数定义:
TensorDefScalarDefIndexAttrDef域定义:
domain(D.m, D.n, D.k)接口和特性:
implements(...)defines(...)计算表达式:
+=)和点对点操作(=)@linalg_structured_op装饰器执行以下操作:
# mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py
deflinalg_structured_op(dsl_func=None, *, op_name=None, op_class_name=None):
# 1. 提取函数签名和参数
sig = inspect.signature(dsl_func)
# 2. 创建LinalgOpDef对象
op_def = LinalgOpDef(
name=op_name or dsl_func.__name__,
cpp_class_name=op_class_name,
doc=inspect.getdoc(dsl_func)
)
# 3. 解析参数(TensorDef, ScalarDef等)
for param_name, param in sig.parameters.items():
op_def.add_operand(param_name, param.default.operand_def)
# 4. 执行DSL函数体,收集计算表达式
with bind_op_def(op_def):
dsl_func(*dsl_func_args)
# 5. 返回可调用对象
return DefinedOpCallable(op_name, op_def)
从LinalgOpDef生成LinalgOpConfig:
# 生成索引映射(indexing maps)
indexing_maps = []
for operand in op_def.operands:
affine_map = create_affine_map_from_indices(operand.indices)
indexing_maps.append(affine_map)
# 生成迭代器类型(parallel, reduction等)
iterator_types = []
for dim in op_def.domain:
if dim in reduction_dims:
iterator_types.append("reduction")
else:
iterator_types.append("parallel")
根据配置生成实际的MLIR操作:
defemit_generic_structured_op(op_config, *ins, outs, **attrs):
# 1. 准备操作数和输出
in_values = [get_op_result_or_value(i) for i in ins]
out_values = prepare_outs(outs)
# 2. 创建属性
indexing_maps_attr = ArrayAttr.get([AffineMapAttr.get(m) for m in indexing_maps])
iterator_types_attr = ArrayAttr.get([Attribute.parse(f"#linalg.iterator_type<{t}>")
for t in iterator_types])
# 3. 构建linalg.generic操作
generic_op = linalg.GenericOp(
result_types,
inputs=in_values,
outputs=out_values,
indexing_maps=indexing_maps_attr,
iterator_types=iterator_types_attr
)
# 4. 填充region body
with InsertionPoint(generic_op.region.blocks[0]):
# 生成计算表达式对应的操作
result = emit_computation_body(op_config.assignments)
linalg.YieldOp([result])
return generic_op
Toy是MLIR教程中使用的示例语言,展示了从自定义语言到MLIR的完整流程。
def multiply_transpose(a, b) {
return transpose(a) * transpose(b);
}
def main() {
var a<2, 3> = [[1, 2, 3], [4, 5, 6]];
var b<2, 3> = [1, 2, 3, 4, 5, 6];
var c = multiply_transpose(a, b);
print(c);
}
// mlir/examples/toy/Ch2/mlir/MLIRGen.cpp
classMLIRGenImpl {
public:
mlir::ModuleOp mlirGen(ModuleAST &moduleAST){
// 创建空的MLIR模块
theModule = mlir::ModuleOp::create(builder.getUnknownLoc());
// 逐个转换函数
for (FunctionAST &f : moduleAST)
mlirGen(f);
// 验证模块
if (failed(mlir::verify(theModule))) {
theModule.emitError("module verification error");
returnnullptr;
}
return theModule;
}
private:
// 转换二元表达式
mlir::Value mlirGen(BinaryExprAST &binop){
mlir::Value lhs = mlirGen(*binop.getLHS());
mlir::Value rhs = mlirGen(*binop.getRHS());
auto location = loc(binop.loc());
switch (binop.getOp()) {
case'+':
return AddOp::create(builder, location, lhs, rhs);
case'*':
return MulOp::create(builder, location, lhs, rhs);
}
}
// 转换字面量
mlir::Value mlirGen(LiteralExprAST &lit){
auto type = getType(lit.getDims());
std::vector<double> data;
collectData(lit, data);
auto dataType = mlir::RankedTensorType::get(lit.getDims(), builder.getF64Type());
auto dataAttribute = mlir::DenseElementsAttr::get(dataType, data);
return ConstantOp::create(builder, loc(lit.loc()), type, dataAttribute);
}
};
生成的MLIR IR:
module {
toy.func @multiply_transpose(%arg0: tensor<*xf64>, %arg1: tensor<*xf64>) -> tensor<*xf64> {
%0 = toy.transpose(%arg0 : tensor<*xf64>) to tensor<*xf64>
%1 = toy.transpose(%arg1 : tensor<*xf64>) to tensor<*xf64>
%2 = toy.mul %0, %1 : tensor<*xf64>
toy.return %2 : tensor<*xf64>
}
toy.func @main() {
%0 = toy.constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf64>
%1 = toy.constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]> : tensor<6xf64>
%2 = toy.reshape(%1 : tensor<6xf64>) to tensor<2x3xf64>
%3 = toy.generic_call @multiply_transpose(%0, %2) : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64>
toy.print %3 : tensor<*xf64>
toy.return
}
}
TableGen是LLVM项目的代码生成工具,用于从声明式描述生成C++代码。
// 定义类(可模板化)
class MyClass<int width> {
int bitWidth = width;
string name;
}
// 定义记录(实例)
def MyRecord : MyClass<32> {
let name = "example";
}
// DAG类型(有向无环图)
dag myDag = (operator arg0, arg1:$name, arg2);
ODS是基于TableGen的操作定义框架,用于声明式地定义MLIR操作。
// include/toy/Ops.td
def Toy_Dialect : Dialect {
let name = "toy";
let summary = "A high-level dialect for analyzing and optimizing the Toy language";
let description = [{
The Toy language is a tensor-based language that allows you to define
functions, perform some math computation, and print results.
}];
let cppNamespace = "toy";
}
class Toy_Op<string mnemonic, list<Trait> traits = []> :
Op<Toy_Dialect, mnemonic, traits>;
def ConstantOp : Toy_Op<"constant", [Pure]> {
let summary = "constant operation";
let description = [{
Constant operation turns a literal into an SSA value.
Example:
%0 = toy.constant dense<[[1.0, 2.0, 3.0]]> : tensor<1x3xf64>
}];
// 参数定义
let arguments = (ins F64ElementsAttr:$value);
// 结果定义
let results = (outs F64Tensor:$output);
// 自定义构建方法
let builders = [
OpBuilder<(ins "DenseElementsAttr":$value), [{
build(builder, result, value.getType(), value);
}]>,
OpBuilder<(ins "double":$value)>
];
// 自定义汇编格式
let assemblyFormat = "$value attr-dict `:` type($output)";
// 启用验证器
let hasVerifier = 1;
}
def TransposeOp : Toy_Op<"transpose", [Pure]> {
let summary = "transpose operation";
let arguments = (ins F64Tensor:$input);
let results = (outs F64Tensor:$output);
let assemblyFormat = "`(` $input `:` type($input) `)` attr-dict `to` type($output)";
// 启用规范化
let hasCanonicalizer = 1;
}
mlir-tblgen -gen-op-decls Ops.td -I ${mlir_src}/include/
生成的C++头文件(简化版):
// 自动生成的ConstantOp类
classConstantOp :public Op<ConstantOp, OpTrait::ZeroOperands,
OpTrait::OneResult, OpTrait::Pure> {
public:
using Op::Op;
staticconstexpr StringLiteral getOperationName(){
return StringLiteral("toy.constant");
}
// 自动生成的访问器
DenseElementsAttr getValue(){ return (*this)->getAttr("value").cast<DenseElementsAttr>(); }
// 自动生成的构建方法
staticvoidbuild(OpBuilder &builder, OperationState &state, DenseElementsAttr value);
staticvoidbuild(OpBuilder &builder, OperationState &state, double value);
// 验证方法
LogicalResult verify();
// 解析和打印方法
static ParseResult parse(OpAsmParser &parser, OperationState &result);
voidprint(OpAsmPrinter &p);
};
mlir-tblgen -gen-python-op-bindings -bind-dialect=toy Ops.td
生成的Python代码(简化版):
# _toy_ops_gen.py (自动生成)
from ._ods_common import _cext as _ods_cext
_ods_ir = _ods_cext.ir
@_ods_cext.register_operation(_Dialect)
classConstantOp(_ods_ir.OpView):
OPERATION_NAME = "toy.constant"
def__init__(self, value, *, loc=None, ip=None):
# 调用底层C++ API构建操作
...
@property
defvalue(self):
return self.operation.attributes["value"]
@value.setter
defvalue(self, value):
self.operation.attributes["value"] = value
# 声明TableGen目标
set(LLVM_TARGET_DEFINITIONS Ops.td)
mlir_tablegen(Ops.h.inc -gen-op-decls)
mlir_tablegen(Ops.cpp.inc -gen-op-defs)
mlir_tablegen(Dialect.h.inc -gen-dialect-decls)
mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs)
add_public_tablegen_target(ToyOpsIncGen)
# 声明Python绑定
declare_mlir_dialect_python_bindings(
ADD_TO_PARENT ToyPythonSources
ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/python"
TD_FILE python/ToyOps.td
SOURCES python/toy.py
DIALECT_NAME toy
)
// toy/Dialect.h
classToyDialect :public mlir::Dialect {
public:
explicitToyDialect(mlir::MLIRContext *ctx);
static llvm::StringRef getDialectNamespace(){ return"toy"; }
voidinitialize();
// 类型解析
mlir::Type parseType(mlir::DialectAsmParser &parser)const override;
voidprintType(mlir::Type type, mlir::DialectAsmPrinter &os)const override;
};
// toy/Dialect.cpp
void ToyDialect::initialize() {
// 注册操作
addOperations<
ConstantOp,
AddOp,
MulOp,
TransposeOp,
PrintOp,
ReturnOp
>();
// 注册类型
addTypes<StructType>();
// 注册接口
addInterfaces<ToyInlinerInterface>();
}
from mlir.ir import Context
from mlir.dialects import toy
# 创建上下文并加载dialect
with Context() as ctx:
# Dialect在首次使用时自动加载
# 或者显式加载
ctx.load_dialect("toy")
# 现在可以使用toy dialect的操作
const_op = toy.ConstantOp(...)
# mlir/python/mlir/dialects/toy.py
from ._toy_ops_gen import * # 导入自动生成的操作类
# 扩展生成的操作类
from ._toy_ops_gen import _Dialect, ConstantOp as _ConstantOp
from ._ods_common import _cext
@_cext.register_operation(_Dialect, replace=True)
classConstantOp(_ConstantOp):
"""扩展的ConstantOp,提供更友好的构造函数"""
def__init__(self, result_type, value, *, loc=None, ip=None):
if isinstance(value, (int, float)):
# 从Python值创建属性
attr = create_dense_attr(result_type, value)
super().__init__(attr, loc=loc, ip=ip)
else:
super().__init__(value, loc=loc, ip=ip)
with Context() as ctx:
module = Module.create()
with InsertionPoint(module.body), Location.unknown():
# 1. 创建操作(detached状态)
op = Operation.create("toy.constant", ...)
# 2. 操作被插入到block中(attached状态)
# (通过InsertionPoint自动完成)
# 3. 操作可以被遍历和修改
for operand in op.operands:
print(operand.type)
# 4. 操作可以被替换或删除
# rewriter.replaceOp(op, new_values)
# rewriter.eraseOp(op)
┌──────────────────────────────────────────────────────────────┐
│ 1. 源语言解析 │
│ Python DSL / Toy源码 → AST │
└────────────────────────┬─────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────────────┐
│ 2. MLIR IR生成 │
│ AST → High-level Dialect IR (Toy, Linalg等) │
│ - 使用OpBuilder构建操作 │
│ - 设置类型、属性、位置信息 │
└────────────────────────┬─────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────────────┐
│ 3. 高层优化 │
│ - Canonicalization(规范化) │
│ - CSE(公共子表达式消除) │
│ - Inlining(内联) │
│ - Shape Inference(形状推断) │
└────────────────────────┬─────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────────────┐
│ 4. Dialect转换(Lowering) │
│ High-level Dialect → Mid-level Dialect │
│ - Toy → Affine + Arith + MemRef │
│ - Linalg → Affine/SCF │
└────────────────────────┬─────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────────────┐
│ 5. 中层优化 │
│ - Affine loop fusion/tiling │
│ - Memory optimization │
│ - Vectorization │
└────────────────────────┬─────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────────────┐
│ 6. 最终Lowering │
│ Mid-level Dialect → LLVM Dialect │
│ - Affine → SCF → CF → LLVM │
│ - MemRef → LLVM │
└────────────────────────┬─────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────────────┐
│ 7. LLVM IR生成 │
│ LLVM Dialect → LLVM IR │
└────────────────────────┬─────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────────────┐
│ 8. 代码生成 │
│ LLVM IR → 机器码 │
└──────────────────────────────────────────────────────────────┘
from mlir.ir import Context, Module, Location, InsertionPoint
from mlir.ir import F64Type, RankedTensorType
from mlir.dialects import toy, func, arith
from mlir.passmanager import PassManager
defbuild_toy_module():
with Context() as ctx:
# 加载必要的dialect
ctx.load_dialect("toy")
ctx.load_dialect("func")
module = Module.create()
with InsertionPoint(module.body):
# 定义函数类型
tensor_type = RankedTensorType.get([2, 3], F64Type.get())
func_type = FunctionType.get([tensor_type], [tensor_type])
# 创建函数
with Location.file("example.toy", 1, 1):
func_op = func.FuncOp(name="transpose_twice", type=func_type)
# 创建函数体
entry_block = func_op.add_entry_block()
with InsertionPoint(entry_block):
arg = entry_block.arguments[0]
# 第一次转置
t1 = toy.TransposeOp(arg)
# 第二次转置
t2 = toy.TransposeOp(t1)
# 返回
func.ReturnOp([t2])
return module
defoptimize_module(module):
# 创建Pass管理器
pm = PassManager.parse("builtin.module(toy-canonicalize,cse)")
# 运行优化
pm.run(module.operation)
return module
# 使用
module = build_toy_module()
print("原始IR:")
print(module)
optimized = optimize_module(module)
print("\n优化后IR:")
print(optimized)
void ToyToAffineLoweringPass::runOnOperation() {
// 1. 定义转换目标
mlir::ConversionTarget target(getContext());
// 2. 声明合法的dialect
target.addLegalDialect<affine::AffineDialect,
arith::ArithDialect,
func::FuncDialect,
memref::MemRefDialect>();
// 3. 声明非法的dialect
target.addIllegalDialect<ToyDialect>();
// 4. 特定操作的动态合法性
target.addDynamicallyLegalOp<toy::PrintOp>([](toy::PrintOp op) {
// 只有当操作数都不是TensorType时才合法
return llvm::none_of(op->getOperandTypes(),
llvm::IsaPred<TensorType>);
});
}
// 将toy.transpose转换为affine循环
structTransposeOpLowering :public OpConversionPattern<toy::TransposeOp> {
using OpConversionPattern<toy::TransposeOp>::OpConversionPattern;
LogicalResult matchAndRewrite(
toy::TransposeOp op,
OpAdaptor adaptor,
ConversionPatternRewriter &rewriter)const final {
auto loc = op.getLoc();
auto input = adaptor.getInput(); // 已转换的操作数
// 获取输入形状
auto inputType = input.getType().cast<MemRefType>();
auto shape = inputType.getShape();
// 分配输出buffer
auto outputType = MemRefType::get({shape[1], shape[0]},
inputType.getElementType());
auto alloc = rewriter.create<memref::AllocOp>(loc, outputType);
// 生成嵌套的affine循环
SmallVector<int64_t, 2> lowerBounds(2, 0);
SmallVector<int64_t, 2> steps(2, 1);
affine::buildAffineLoopNest(
rewriter, loc, lowerBounds, shape, steps,
[&](OpBuilder &builder, Location loc, ValueRange ivs) {
// 循环体:output[j][i] = input[i][j]
auto loadVal = builder.create<affine::AffineLoadOp>(
loc, input, ValueRange{ivs[0], ivs[1]});
builder.create<affine::AffineStoreOp>(
loc, loadVal, alloc, ValueRange{ivs[1], ivs[0]});
});
// 替换原操作
rewriter.replaceOp(op, alloc);
return success();
}
};
void ToyToAffineLoweringPass::runOnOperation() {
ConversionTarget target(getContext());
// ... 设置target ...
// 添加转换模式
RewritePatternSet patterns(&getContext());
patterns.add<TransposeOpLowering,
MulOpLowering,
AddOpLowering>(&getContext());
// 执行部分转换
if (failed(applyPartialConversion(getOperation(), target, patterns)))
signalPassFailure();
}
最常见的Pass类型,对特定操作进行转换:
structMyPass :public PassWrapper<MyPass, OperationPass<>> {
voidrunOnOperation() override {
Operation *op = getOperation();
// 处理操作
}
};
限定只在特定操作类型上运行:
structMyFunctionPass :public PassWrapper<MyFunctionPass,
OperationPass<func::FuncOp>> {
voidrunOnOperation() override {
func::FuncOp func = getOperation();
// 只处理函数操作
}
};
基于接口的Pass:
structMyInterfacePass :public PassWrapper<MyInterfacePass,
InterfacePass<FunctionOpInterface>> {
voidrunOnOperation() override {
FunctionOpInterface func = getOperation();
// 处理实现了FunctionOpInterface的操作
}
};
def ToyInliner : Pass<"toy-inline", "mlir::ModuleOp"> {
let summary = "Inline toy function calls";
let description = [{
This pass inlines toy function calls into their callers.
}];
let constructor = "mlir::toy::createInlinerPass()";
let dependentDialects = ["toy::ToyDialect"];
let options = [
Option<"maxInlineDepth", "max-depth", "unsigned", /*default=*/"10",
"Maximum inlining depth">
];
}
生成的代码:
std::unique_ptr<Pass> createToyInlinerPass() {
returnstd::make_unique<ToyInlinerPass>();
}
std::unique_ptr<Pass> createToyInlinerPass(unsigned maxInlineDepth) {
returnstd::make_unique<ToyInlinerPass>(maxInlineDepth);
}
#include"mlir/Pass/PassManager.h"
voidrunOptimizationPipeline(mlir::ModuleOp module){
mlir::PassManager pm(module.getContext());
// 添加模块级别的pass
pm.addPass(mlir::createInlinerPass());
pm.addPass(mlir::createCSEPass());
// 添加函数级别的pass
pm.addNestedPass<func::FuncOp>(mlir::createCanonicalizerPass());
pm.addNestedPass<func::FuncOp>(toy::createShapeInferencePass());
// 运行pipeline
if (failed(pm.run(module))) {
llvm::errs() << "Pass pipeline failed\n";
}
}
from mlir.passmanager import PassManager
defrun_optimization(module):
# 方式1:使用字符串解析pipeline
pm = PassManager.parse("builtin.module(inline,cse,canonicalize)")
pm.run(module.operation)
# 方式2:程序化构建pipeline
pm = PassManager("builtin.module")
pm.add("inline")
pm.add("cse")
# 添加嵌套pass
func_pm = pm.nest("func.func")
func_pm.add("canonicalize")
func_pm.add("toy-shape-inference")
# 启用IR打印(调试用)
pm.enable_ir_printing()
# 运行
pm.run(module.operation)
return module
// 消除冗余的transpose操作
structSimplifyRedundantTranspose :public OpRewritePattern<TransposeOp> {
SimplifyRedundantTranspose(MLIRContext *context)
: OpRewritePattern<TransposeOp>(context, /*benefit=*/1) {}
LogicalResult matchAndRewrite(TransposeOp op,
PatternRewriter &rewriter)const override {
// 匹配模式:transpose(transpose(x))
Value transposeInput = op.getOperand();
TransposeOp transposeInputOp = transposeInput.getDefiningOp<TransposeOp>();
if (!transposeInputOp)
return failure();
// 重写:替换为x
rewriter.replaceOp(op, {transposeInputOp.getOperand()});
return success();
}
};
// 注册到Canonicalizer
void TransposeOp::getCanonicalizationPatterns(
RewritePatternSet &results, MLIRContext *context) {
results.add<SimplifyRedundantTranspose>(context);
}
from mlir.rewrite import RewritePatternSet, apply_patterns_and_fold_greedily
from mlir.dialects import arith
defaddi_to_muli_pattern(op, rewriter):
"""将arith.addi重写为arith.muli"""
with rewriter.ip:
new_op = arith.muli(op.lhs, op.rhs, loc=op.location)
rewriter.replace_op(op, new_op)
defoptimize_with_patterns(module):
# 创建pattern set
patterns = RewritePatternSet()
patterns.add(arith.AddIOp, addi_to_muli_pattern)
# 冻结并应用
frozen = patterns.freeze()
apply_patterns_and_fold_greedily(module, frozen)
使用TableGen定义重写规则:
// ToyCombine.td
include "toy/Ops.td"
// 模式1:消除冗余reshape
def ReshapeReshapeOptPattern : Pat<
(ReshapeOp(ReshapeOp $arg)),
(ReshapeOp $arg)
>;
// 模式2:消除类型相同的reshape
def TypesAreIdentical : Constraint<CPred<"$0.getType() == $1.getType()">>;
def RedundantReshapeOptPattern : Pat<
(ReshapeOp:$res $arg),
(replaceWithValue $arg),
[(TypesAreIdentical $res, $arg)]
>;
// 模式3:常量折叠
def FoldConstantReshape : Pat<
(ReshapeOp (ConstantOp $value)),
(ConstantOp (ReshapeConstant $value))
>;
生成C++代码:
mlir-tblgen -gen-rewriters ToyCombine.td -I ${mlir_src}/include/
MLIR支持渐进式lowering,允许在同一IR中混合不同抽象层次:
// 部分lowering后的IR
func.func @example(%arg0: tensor<10xf32>) -> memref<10xf32> {
// 高层操作(尚未lowering)
%0 = toy.print %arg0 : tensor<10xf32>
// 已lowering到affine
%alloc = memref.alloc() : memref<10xf32>
affine.for %i = 0 to 10 {
%val = tensor.extract %0[%i] : tensor<10xf32>
affine.store %val, %alloc[%i] : memref<10xf32>
}
return %alloc : memref<10xf32>
}
Lowering通常涉及类型转换:
classToyToAffineTypeConverter :public TypeConverter {
public:
ToyToAffineTypeConverter() {
// Tensor → MemRef转换
addConversion([](TensorType type) {
return MemRefType::get(type.getShape(), type.getElementType());
});
// 其他类型保持不变
addConversion([](Type type) { return type; });
}
};
// toy.transpose → affine loops
structTransposeOpLowering :public OpConversionPattern<toy::TransposeOp> {
LogicalResult matchAndRewrite(...)const final {
// 生成:
// affine.for %i = 0 to M {
// affine.for %j = 0 to N {
// %v = affine.load %input[%i, %j]
// affine.store %v, %output[%j, %i]
// }
// }
}
};
转换前:
%1 = toy.transpose(%0 : tensor<2x3xf64>) to tensor<3x2xf64>
转换后:
%alloc = memref.alloc() : memref<3x2xf64>
affine.for %i = 0 to 2 {
affine.for %j = 0 to 3 {
%val = affine.load %input[%i, %j] : memref<2x3xf64>
affine.store %val, %alloc[%j, %i] : memref<3x2xf64>
}
}
// affine.for → scf.for
affine.for %i = 0 to 10 {
%val = affine.load %mem[%i]
}
// ↓ 转换为
%c0 = arith.constant 0 : index
%c10 = arith.constant 10 : index
%c1 = arith.constant 1 : index
scf.for %i = %c0 to %c10 step %c1 {
%val = memref.load %mem[%i] : memref<10xf32>
}
// scf.for → cf (control flow)
scf.for %i = %lb to %ub step %step {
...
}
// ↓ 转换为
cf.br ^header(%lb : index)
^header(%i: index):
%cond = arith.cmpi slt, %i, %ub : index
cf.cond_br %cond, ^body, ^exit
^body:
...
%next = arith.addi %i, %step : index
cf.br ^header(%next : index)
^exit:
...
// ↓ 最终转换为LLVM dialect
llvm.br ^header(%lb : i64)
^header(%i: i64):
%cond = llvm.icmp "slt" %i, %ub : i64
llvm.cond_br %cond, ^body, ^exit
^body:
...
%next = llvm.add %i, %step : i64
llvm.br ^header(%next : i64)
^exit:
...
from mlir.dialects import toy, affine, arith, memref, func
from mlir.ir import Module, Context
from mlir.passmanager import PassManager
deflower_toy_to_affine(module_str):
with Context() as ctx:
# 解析包含Toy操作的模块
module = Module.parse(module_str)
# 创建lowering pipeline
pm = PassManager.parse(
"builtin.module("
" func.func("
" toy-shape-inference,"
" toy-inline"
" ),"
" toy-lower-to-affine"
")"
)
# 运行lowering
pm.run(module.operation)
return module
# 使用
toy_ir = """
module {
toy.func @main() {
%0 = toy.constant dense<[[1.0, 2.0]]> : tensor<1x2xf64>
%1 = toy.transpose(%0) : tensor<1x2xf64> to tensor<2x1xf64>
toy.print %1 : tensor<2x1xf64>
toy.return
}
}
"""
lowered = lower_toy_to_affine(toy_ir)
print(lowered)
// MyDialect.td
include "mlir/IR/OpBase.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
def MyDialect : Dialect {
let name = "mydialect";
let summary = "My custom dialect";
let cppNamespace = "::mydialect";
}
class MyDialect_Op<string mnemonic, list<Trait> traits = []> :
Op<MyDialect, mnemonic, traits>;
def AddOp : MyDialect_Op<"add", [Pure, Commutative]> {
let summary = "element-wise addition";
let arguments = (ins AnyTensor:$lhs, AnyTensor:$rhs);
let results = (outs AnyTensor:$result);
let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($result)";
}
def ConstantOp : MyDialect_Op<"constant", [Pure]> {
let summary = "constant value";
let arguments = (ins AnyAttr:$value);
let results = (outs AnyType:$result);
let builders = [
OpBuilder<(ins "Attribute":$value)>
];
}
# 生成C++代码
mlir-tblgen -gen-op-decls MyDialect.td -o MyDialect.h.inc
mlir-tblgen -gen-op-defs MyDialect.td -o MyDialect.cpp.inc
mlir-tblgen -gen-dialect-decls MyDialect.td -o MyDialectDialect.h.inc
mlir-tblgen -gen-dialect-defs MyDialect.td -o MyDialectDialect.cpp.inc
# 生成Python绑定
mlir-tblgen -gen-python-op-bindings -bind-dialect=mydialect MyDialect.td -o _mydialect_ops_gen.py
// MyDialect.h
#include"mlir/IR/Dialect.h"
#include"mlir/IR/OpDefinition.h"
namespace mydialect {
classMyDialect :public mlir::Dialect {
public:
explicitMyDialect(mlir::MLIRContext *context);
staticconstexpr StringLiteral getDialectNamespace(){ return"mydialect"; }
voidinitialize();
};
} // namespace mydialect
// 包含自动生成的代码
#define GET_OP_CLASSES
#include"MyDialect.h.inc"
// MyDialect.cpp
#include"MyDialect.h"
void MyDialect::initialize() {
addOperations<
#define GET_OP_LIST
#include"MyDialect.cpp.inc"
>();
}
# python/mydialect.py
from ._mydialect_ops_gen import *
from ._ods_common import _cext
# 扩展生成的ConstantOp
@_cext.register_operation(_Dialect, replace=True)
classConstantOp(_ConstantOp):
def__init__(self, result_type, value, *, loc=None, ip=None):
if isinstance(value, (int, float)):
# 从Python值创建MLIR属性
if isinstance(value, int):
attr = IntegerAttr.get(result_type, value)
else:
attr = FloatAttr.get(result_type, value)
super().__init__(attr, loc=loc, ip=ip)
else:
super().__init__(value, loc=loc, ip=ip)
from mlir.ir import Context, Module, Location, InsertionPoint
from mlir.ir import F32Type, RankedTensorType
from mlir.dialects import mydialect
defbuild_example():
with Context() as ctx:
ctx.load_dialect("mydialect")
module = Module.create()
with InsertionPoint(module.body), Location.unknown():
tensor_type = RankedTensorType.get([10], F32Type.get())
# 创建常量
c1 = mydialect.ConstantOp(tensor_type, 1.0)
c2 = mydialect.ConstantOp(tensor_type, 2.0)
# 执行加法
result = mydialect.AddOp(c1, c2)
return module
module = build_example()
print(module)
输出:
module {
%0 = mydialect.constant 1.0 : tensor<10xf32>
%1 = mydialect.constant 2.0 : tensor<10xf32>
%2 = mydialect.add %0, %1 : tensor<10xf32>
}
from mlir.ir import Context, Module
from mlir.passmanager import PassManager
defcompile_toy_to_llvm(toy_source):
"""
完整的Toy编译流程:
Toy → Affine → SCF → CF → LLVM → 机器码
"""
with Context() as ctx:
# 1. 解析Toy源码到MLIR
module = parse_toy_source(toy_source)
print("=== 阶段1: Toy IR ===")
print(module)
# 2. 高层优化
pm = PassManager.parse(
"builtin.module("
" func.func(toy-inline),"
" func.func(toy-shape-inference),"
" func.func(canonicalize),"
" cse"
")"
)
pm.run(module.operation)
print("\n=== 阶段2: 优化后的Toy IR ===")
print(module)
# 3. Lowering到Affine
pm = PassManager.parse("builtin.module(toy-lower-to-affine)")
pm.run(module.operation)
print("\n=== 阶段3: Affine IR ===")
print(module)
# 4. Affine优化
pm = PassManager.parse(
"builtin.module("
" func.func(affine-loop-fusion),"
" func.func(affine-scalrep),"
" func.func(lower-affine)"
")"
)
pm.run(module.operation)
print("\n=== 阶段4: SCF IR ===")
print(module)
# 5. Lowering到LLVM
pm = PassManager.parse(
"builtin.module("
" func.func(convert-scf-to-cf),"
" convert-func-to-llvm,"
" convert-cf-to-llvm,"
" convert-arith-to-llvm,"
" convert-memref-to-llvm,"
" reconcile-unrealized-casts"
")"
)
pm.run(module.operation)
print("\n=== 阶段5: LLVM Dialect IR ===")
print(module)
# 6. 转换到LLVM IR
from mlir.execution_engine import ExecutionEngine
# 注册LLVM翻译
from mlir._mlir_libs._mlirRegisterEverything import register_llvm_translations
register_llvm_translations(ctx)
# 创建执行引擎
engine = ExecutionEngine(module)
# 7. JIT编译并执行
result = engine.invoke("main")
return result
from mlir.dialects.linalg.opdsl.lang import *
@linalg_structured_op
defmy_conv2d(
input=TensorDef(T1, S.N, S.H, S.W, S.C),
kernel=TensorDef(T2, S.KH, S.KW, S.C, S.F),
output=TensorDef(U, S.N, S.OH, S.OW, S.F, output=True),
strides=IndexAttrDef(S.SH, S.SW, default=[1, 1]),
dilations=IndexAttrDef(S.DH, S.DW, default=[1, 1])
):
"""2D卷积操作"""
domain(D.n, D.oh, D.ow, D.f, D.kh, D.kw, D.c)
output[D.n, D.oh, D.ow, D.f] += (
TypeFn.cast_signed(U, input[D.n,
D.oh * S.SH + D.kh * S.DH,
D.ow * S.SW + D.kw * S.DW,
D.c]) *
TypeFn.cast_signed(U, kernel[D.kh, D.kw, D.c, D.f])
)
from mlir.ir import Context, Module, Location, InsertionPoint
from mlir.ir import F32Type, RankedTensorType
from mlir.dialects import tensor
defbuild_conv2d_example():
with Context() as ctx:
module = Module.create()
with InsertionPoint(module.body), Location.unknown():
# 定义类型
input_type = RankedTensorType.get([1, 28, 28, 3], F32Type.get())
kernel_type = RankedTensorType.get([3, 3, 3, 64], F32Type.get())
output_type = RankedTensorType.get([1, 26, 26, 64], F32Type.get())
# 创建输入
input_tensor = tensor.empty(input_type)
kernel_tensor = tensor.empty(kernel_type)
output_tensor = tensor.empty(output_type)
# 调用OpDSL定义的操作
result = my_conv2d(
input_tensor,
kernel_tensor,
outs=[output_tensor],
strides=[1, 1],
dilations=[1, 1]
)
return module
module = build_conv2d_example()
print(module)
生成的IR:
module {
%input = tensor.empty() : tensor<1x28x28x3xf32>
%kernel = tensor.empty() : tensor<3x3x3x64xf32>
%output = tensor.empty() : tensor<1x26x26x64xf32>
%result = linalg.generic {
indexing_maps = [
affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>,
affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>,
affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
],
iterator_types = ["parallel", "parallel", "parallel", "parallel",
"reduction", "reduction", "reduction"]
} ins(%input, %kernel : tensor<1x28x28x3xf32>, tensor<3x3x3x64xf32>)
outs(%output : tensor<1x26x26x64xf32>) {
^bb0(%in: f32, %k: f32, %out: f32):
%mul = arith.mulf %in, %k : f32
%add = arith.addf %out, %mul : f32
linalg.yield %add : f32
} -> tensor<1x26x26x64xf32>
}
from mlir.ir import Context, Module, Location, InsertionPoint
from mlir.ir import F32Type, RankedTensorType, FunctionType
from mlir.dialects import func, arith, tensor, linalg
from mlir.passmanager import PassManager
from mlir.execution_engine import ExecutionEngine
import numpy as np
defcreate_matmul_module():
"""创建矩阵乘法模块"""
with Context() as ctx:
module = Module.create()
with InsertionPoint(module.body):
# 定义函数类型
f32 = F32Type.get()
matrix_type = RankedTensorType.get([4, 4], f32)
func_type = FunctionType.get(
[matrix_type, matrix_type],
[matrix_type]
)
# 创建函数
with Location.file("matmul.py", 1, 1):
func_op = func.FuncOp(name="matmul", type=func_type)
entry = func_op.add_entry_block()
with InsertionPoint(entry):
lhs, rhs = entry.arguments
# 创建输出tensor
output = tensor.empty(matrix_type)
# 创建matmul操作
result = linalg.matmul(lhs, rhs, outs=[output])
# 返回
func.ReturnOp([result])
return module
defoptimize_and_lower(module):
"""优化并lowering到LLVM"""
pm = PassManager.parse(
"builtin.module("
" func.func(linalg-generalize-named-ops),"
" func.func(linalg-fuse-elementwise-ops),"
" one-shot-bufferize{bufferize-function-boundaries},"
" func.func(convert-linalg-to-loops),"
" func.func(lower-affine),"
" convert-scf-to-cf,"
" convert-func-to-llvm,"
" convert-cf-to-llvm,"
" convert-arith-to-llvm,"
" convert-memref-to-llvm,"
" reconcile-unrealized-casts"
")"
)
pm.run(module.operation)
return module
defcompile_and_run():
# 1. 创建IR
module = create_matmul_module()
print("=== 原始IR ===")
print(module)
# 2. 优化和lowering
module = optimize_and_lower(module)
print("\n=== Lowering后的IR ===")
print(module)
# 3. JIT编译
engine = ExecutionEngine(module)
# 4. 准备输入数据
a = np.array([[1, 2, 3, 4]] * 4, dtype=np.float32)
b = np.array([[1], [2], [3], [4]] * 4, dtype=np.float32).T
# 5. 执行
result = engine.invoke("matmul", a, b)
print("\n=== 执行结果 ===")
print(result)
# 运行
compile_and_run()
from mlir.ir import Context, Module, OpView
from mlir.passmanager import PassManager
defmy_optimization_pass(op, pass_):
"""自定义优化pass"""
# 遍历操作树
for region in op.regions:
for block in region:
for nested_op in block:
# 执行优化逻辑
if nested_op.name == "arith.addi":
# 尝试常量折叠
if is_constant(nested_op.operands[0]) and \
is_constant(nested_op.operands[1]):
# 执行折叠
fold_constant_add(nested_op)
defrun_custom_pass(module):
pm = PassManager("builtin.module")
# 添加C++定义的pass
pm.add("cse")
# 添加Python定义的pass
pm.add(my_optimization_pass)
# 添加更多pass
pm.add("canonicalize")
pm.run(module.operation)
return module
from mlir.rewrite import RewritePatternSet, apply_patterns_and_fold_greedily
from mlir.dialects import arith
from mlir.ir import IntegerAttr, IntegerType
defconstant_fold_addi(op, rewriter):
"""常量折叠:addi(const1, const2) → const3"""
lhs_op = op.operands[0].owner
rhs_op = op.operands[1].owner
# 检查两个操作数是否都是常量
if lhs_op.name != "arith.constant"or rhs_op.name != "arith.constant":
return# 不匹配,返回None表示失败
# 提取常量值
lhs_val = lhs_op.attributes["value"].value
rhs_val = rhs_op.attributes["value"].value
# 计算结果
result_val = lhs_val + rhs_val
# 创建新的常量操作
with rewriter.ip:
result_type = op.results[0].type
new_const = arith.ConstantOp(result_type, result_val, loc=op.location)
# 替换原操作
rewriter.replace_op(op, new_const)
defoptimize_module(module):
# 创建pattern set
patterns = RewritePatternSet()
patterns.add(arith.AddIOp, constant_fold_addi)
# 应用patterns
frozen = patterns.freeze()
apply_patterns_and_fold_greedily(module, frozen)
return module
MLIR使用SSA形式,每个值只能被定义一次:
func.func @ssa_example(%arg0: i32) -> i32 {
%0 = arith.constant 10 : i32
%1 = arith.addi %arg0, %0 : i32 // %1定义
%2 = arith.muli %1, %1 : i32 // %1使用
return %2 : i32
}
在Python中:
with InsertionPoint(block):
c10 = arith.ConstantOp(i32_type, 10) # 返回OpResult
sum_val = arith.AddIOp(arg0, c10) # 使用c10的结果
prod = arith.MulIOp(sum_val, sum_val) # 使用sum_val的结果
from mlir.ir import *
# 整数属性
int_attr = IntegerAttr.get(IntegerType.get_signless(32), 42)
# 浮点属性
float_attr = FloatAttr.get(F32Type.get(), 3.14)
# 字符串属性
str_attr = StringAttr.get("hello")
# 数组属性
array_attr = ArrayAttr.get([int_attr, float_attr])
# 字典属性
dict_attr = DictAttr.get({"key1": int_attr, "key2": str_attr})
# Dense元素属性
import numpy as np
data = np.array([[1, 2], [3, 4]], dtype=np.float32)
tensor_type = RankedTensorType.get([2, 2], F32Type.get())
dense_attr = DenseElementsAttr.get(data, type=tensor_type)
from mlir.ir import register_attribute_builder, IntegerAttr, IntegerType
@register_attribute_builder("MyI32Attr")
defbuild_my_i32_attr(value: int, context):
return IntegerAttr.get(IntegerType.get_signless(32, context), value)
# 使用
with Context():
# 现在可以直接传入Python int
op = MyOp(42) # 自动转换为MyI32Attr
// 在ODS中声明类型推断
def MyOp : MyDialect_Op<"my_op", [DeclareOpInterfaceMethods<InferTypeOpInterface>]> {
let arguments = (ins AnyTensor:$input);
let results = (outs AnyTensor:$output);
}
// 实现类型推断
LogicalResult MyOp::inferReturnTypes(
MLIRContext *context,
std::optional<Location> location,
ValueRange operands,
DictionaryAttr attributes,
OpaqueProperties properties,
RegionRange regions,
SmallVectorImpl<Type> &inferredReturnTypes) {
// 从输入推断输出类型
auto inputType = operands[0].getType().cast<TensorType>();
inferredReturnTypes.push_back(inputType);
return success();
}
Python中使用:
# 类型推断自动进行
result = my_dialect.MyOp(input_tensor) # 结果类型自动推断
Traits是编译时的属性:
def MyOp : MyDialect_Op<"my_op", [
Pure, // 无副作用
Commutative, // 可交换
SameOperandsAndResultType // 操作数和结果类型相同
]> {
...
}
Interfaces定义操作的行为协议:
// 定义接口
classMyOpInterface :public OpInterface<MyOpInterface, ...> {
public:
// 接口方法
Value getInput();
voidsetInput(Value input);
};
// 操作实现接口
def MyOp : MyDialect_Op<"my_op", [MyOpInterface]> {
let extraClassDeclaration = [{
Value getInput() { return getOperand(0); }
voidsetInput(Value input){ setOperand(0, input); }
}];
}
Python中使用接口:
from mlir.ir import InferTypeOpInterface
with Context():
op = create_some_op()
# 尝试转换为接口
try:
iface = InferTypeOpInterface(op)
# 调用接口方法
return_types = iface.inferReturnTypes(...)
except ValueError:
print("Operation does not implement InferTypeOpInterface")
from mlir.ir import Context, Module
with Context() as ctx:
module = Module.parse("""
module {
func.func @test(%arg0: i32) -> i32 {
%0 = arith.addi %arg0, %arg0 : i32
return %0 : i32
}
}
""")
# 打印IR
print(module)
# 打印带调试信息的IR
print(module.operation.get_asm(enable_debug_info=True))
# 验证IR
if module.operation.verify():
print("IR is valid")
else:
print("IR is invalid")
# 解析并打印IR
mlir-opt input.mlir
# 运行优化pass
mlir-opt input.mlir -canonicalize -cse
# 打印调试信息
mlir-opt input.mlir -mlir-print-debuginfo
# Lowering
mlir-opt input.mlir -convert-linalg-to-loops -lower-affine
# 打印pass pipeline
mlir-opt input.mlir -pass-pipeline='builtin.module(func.func(canonicalize))'
from mlir.ir import Context, Module
from mlir.passmanager import PassManager
defdebug_compilation(module_str):
with Context() as ctx:
module = Module.parse(module_str)
# 创建pass manager并启用IR打印
pm = PassManager.parse("builtin.module(canonicalize,cse)")
pm.enable_ir_printing()
# 启用统计信息
pm.enable_statistics()
# 启用时间统计
pm.enable_timing()
# 运行
pm.run(module.operation)
return module
mlir/lib/Bindings/Python/mlir/python/mlir/mlir/tools/mlir-tblgen/mlir/python/mlir/dialects/linalg/opdsl/mlir/examples/toy/mlir/include/mlir/IR/OpBase.tdmlir/lib/Bindings/Python/IRCore.cppmlir/tools/mlir-tblgen/OpPythonBindingGen.cppmlir/python/mlir/dialects/linalg/opdsl/lang/dsl.pyMLIR从Python DSL到Dialect的编译过程涉及多个层次和组件:
这种分层设计使得MLIR既保持了高性能(C++实现),又提供了灵活性(Python接口),同时通过TableGen减少了样板代码,使得定义新的Dialect和操作变得简单高效。
通过理解这个完整的流程,开发者可以:
MLIR的可扩展性和多层次特性使其成为构建现代编译器和代码生成工具的理想选择。