#include "llvm/Support/ErrorHandling.h" #include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp" #define DEBUG_TYPE "PimCompilerOptions" namespace onnx_mlir { llvm::cl::opt pimEmissionTarget( llvm::cl::desc("[Optional] Choose PIM-related target to emit (once selected it will cancel the other targets):"), llvm::cl::values(clEnumVal(EmitSpatial, "Lower model to spatial IR")), llvm::cl::values(clEnumVal(EmitPim, "Lower model to PIM IR")), llvm::cl::values(clEnumVal(EmitPimBufferized, "Lower model to PIM IR and bufferize it")), llvm::cl::values(clEnumVal(EmitPimCodegen, "Lower model to PIM IR and generate code for PIM")), llvm::cl::init(EmitPimCodegen), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimMergeScheduler("pim-merge-scheduler", llvm::cl::desc("Scheduler used by the Spatial merge-compute-nodes pass"), llvm::cl::values(clEnumValN(MergeSchedulerPeft, "peft", "Use PEFT scheduling")), llvm::cl::init(MergeSchedulerPeft), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimMemoryReport( "pim-memory-report", llvm::cl::desc("Emit a human-readable PIM memory planning report"), llvm::cl::values(clEnumValN(PimMemoryReportNone, "none", "Do not emit any PIM memory planning report")), llvm::cl::values( clEnumValN(PimMemoryReportSummary, "summary", "Emit a concise slot reuse report with key offenders")), llvm::cl::values(clEnumValN(PimMemoryReportFull, "full", "Emit the full detailed PIM memory planning report")), llvm::cl::init(PimMemoryReportNone), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimConvLowering( "pim-conv-lowering", llvm::cl::desc("Convolution lowering strategy for PIM"), llvm::cl::values(clEnumValN(PimConvLoweringAuto, "auto", "Select the Conv lowering strategy automatically")), llvm::cl::values(clEnumValN(PimConvLoweringLegacy, "legacy", "Use the legacy explicit-im2col Conv lowering")), llvm::cl::values(clEnumValN(PimConvLoweringDepthwise, "depthwise", "Force the depthwise-specialized Conv lowering")), llvm::cl::values( clEnumValN(PimConvLoweringPackedIm2Col, "packed-im2col", "Use explicit im2col with packed multi-position GEMM")), llvm::cl::values(clEnumValN(PimConvLoweringStreamedPatch, "streamed-patch", "Use streamed/chunked im2col rows without multi-position packing")), llvm::cl::values(clEnumValN(PimConvLoweringStreamedPacked, "streamed-packed", "Use streamed/chunked im2col rows with packed multi-position GEMM")), llvm::cl::values(clEnumValN(PimConvLoweringOutputChannelTiled, "output-channel-tiled", "Force Conv lowering that relies on Gemm output-channel tiling")), llvm::cl::values( clEnumValN(PimConvLoweringInputKTiled, "input-k-tiled", "Force Conv lowering that relies on Gemm K tiling")), llvm::cl::values(clEnumValN(PimConvLoweringTiled2D, "tiled-2d", "Force Conv lowering that relies on Gemm 2D K/C tiling")), llvm::cl::init(PimConvLoweringAuto), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimOnlyCodegen("pim-only-codegen", llvm::cl::desc("Only generate code for PIM (assume input is already in bufferized PIM IR)"), llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimDisableMemoryCoalescing("pim-disable-memory-coalescing", llvm::cl::desc("Skip the PIM memory coalescing pass (developer diagnostic option)"), llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt useExperimentalConvImpl("use-experimental-conv-impl", llvm::cl::desc("Use experimental implementation for convolution"), llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimConvIm2colMaxElements( "pim-conv-im2col-max-elements", llvm::cl::desc("Maximum number of im2col elements to materialize globally for one Conv before streaming/chunking"), llvm::cl::init(1ull << 20), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimConvStreamChunkPositions( "pim-conv-stream-chunk-positions", llvm::cl::desc("Maximum number of Conv output positions to materialize in one streamed chunk"), llvm::cl::init(1024), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimReportConvLowering("pim-report-conv-lowering", llvm::cl::desc("Emit a bounded Conv lowering report"), llvm::cl::init(true), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimEmitJson("pim-emit-json", llvm::cl::desc("Also emit per-core JSON instruction files alongside binary .pim files"), llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimDetectCommunicationDeadlock( "pim-detect-communication-deadlock", llvm::cl::desc("Expensively simulate the statically expanded PIM send/receive order at verification time and fail if a blocking communication deadlock is found"), llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimMaterializeScalarFanoutGlobalOrder( "pim-materialize-scalar-fanout-global-order", llvm::cl::desc("Experimental expensive materializer mode: emit scalar-source fanout as globally ordered communication events instead of all-send fanout loops"), llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt pimTraceCommunicationMaterialization( "pim-trace-communication-materialization", llvm::cl::desc("Emit verbose materializer-time diagnostics and provenance attributes for every Spatial communication op"), llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); llvm::cl::opt crossbarSize("crossbar-size", llvm::cl::desc("Width and height of a single crossbar"), llvm::cl::init(2)); llvm::cl::opt crossbarCountInCore("crossbar-count", llvm::cl::desc("Number of crossbars in each core"), llvm::cl::init(256)); llvm::cl::opt coresCount("core-count", llvm::cl::desc("Number of cores in the chip. Required for PIM compilation."), llvm::cl::init(-1)); llvm::cl::opt ignoreConcatError("ignore-concat-error", llvm::cl::desc("Ignore ConcatOp corner case: do not assert and do a simplification"), llvm::cl::init(false)); bool hasExplicitPimCoreCount() { return coresCount.getNumOccurrences() != 0; } void verifyExplicitPimCoreCount() { if (!hasExplicitPimCoreCount()) llvm::report_fatal_error("PIM compilation requires an explicit --core-count="); if (coresCount.getValue() <= 0) llvm::report_fatal_error("PIM compilation requires --core-count to be a positive integer"); } } // namespace onnx_mlir