This commit is contained in:
@@ -0,0 +1,134 @@
|
||||
# the name by which the project can be referenced within Serena
|
||||
project_name: raptor
|
||||
|
||||
# list of languages for which language servers are started; choose from:
|
||||
# al angular ansible bash clojure
|
||||
# cpp cpp_ccls crystal csharp csharp_omnisharp
|
||||
# dart elixir elm erlang fortran
|
||||
# fsharp go groovy haskell haxe
|
||||
# hlsl html java json julia
|
||||
# kotlin lean4 lua luau markdown
|
||||
# matlab msl nix ocaml pascal
|
||||
# perl php php_phpactor powershell python
|
||||
# python_jedi python_ty r rego ruby
|
||||
# ruby_solargraph rust scala scss solidity
|
||||
# svelte swift systemverilog terraform toml
|
||||
# typescript typescript_vts vue yaml zig
|
||||
# (This list may be outdated. For the current list, see values of Language enum here:
|
||||
# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py
|
||||
# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.)
|
||||
# Note:
|
||||
# - For C, use cpp
|
||||
# - For JavaScript, use typescript
|
||||
# - For Angular projects, use angular (subsumes typescript+html; requires `npm install` in the project root)
|
||||
# - For Svelte projects, use svelte (subsumes typescript/javascript for .svelte projects; requires npm)
|
||||
# - For SCSS / Sass / plain CSS, use scss (some-sass-language-server handles all three)
|
||||
# - For Free Pascal/Lazarus, use pascal
|
||||
# Special requirements:
|
||||
# Some languages require additional setup/installations.
|
||||
# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers
|
||||
# When using multiple languages, the first language server that supports a given file will be used for that file.
|
||||
# The first language is the default language and the respective language server will be used as a fallback.
|
||||
# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
|
||||
languages:
|
||||
- cpp
|
||||
- rust
|
||||
- python
|
||||
|
||||
# the encoding used by text files in the project
|
||||
# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings
|
||||
encoding: utf-8
|
||||
|
||||
# list of additional paths to ignore in this project.
|
||||
# Same syntax as gitignore, so you can use * and **.
|
||||
# Note: global ignored_paths from serena_config.yml are also applied additively.
|
||||
ignored_paths:
|
||||
|
||||
# list of mode names that are to be activated by default, overriding the setting in the global configuration.
|
||||
# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes.
|
||||
# If the setting is undefined/empty, the default_modes from the global configuration (serena_config.yml) apply.
|
||||
# Otherwise, this overrides the setting from the global configuration (serena_config.yml).
|
||||
# Therefore, you can set this to [] if you do not want the default modes defined in the global config to apply
|
||||
# for this project.
|
||||
# This setting can, in turn, be overridden by CLI parameters (--mode).
|
||||
# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes
|
||||
default_modes:
|
||||
|
||||
# list of mode names to be activated additionally for this project, e.g. ["query-projects"]
|
||||
# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes.
|
||||
# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes
|
||||
added_modes:
|
||||
|
||||
# list of tool names to exclude.
|
||||
# This extends the existing exclusions (e.g. from the global configuration)
|
||||
# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
|
||||
excluded_tools: []
|
||||
|
||||
# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default).
|
||||
# This extends the existing inclusions (e.g. from the global configuration).
|
||||
# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
|
||||
included_optional_tools: []
|
||||
|
||||
# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools.
|
||||
# This cannot be combined with non-empty excluded_tools or included_optional_tools.
|
||||
# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
|
||||
fixed_tools: []
|
||||
|
||||
# time budget (seconds) per tool call for the retrieval of additional symbol information
|
||||
# such as docstrings or parameter information.
|
||||
# This overrides the corresponding setting in the global configuration; see the documentation there.
|
||||
# If null or missing, use the setting from the global configuration.
|
||||
symbol_info_budget:
|
||||
|
||||
# The language backend to use for this project.
|
||||
# If not set, the global setting from serena_config.yml is used.
|
||||
# Valid values: LSP, JetBrains
|
||||
# Note: the backend is fixed at startup. If a project with a different backend
|
||||
# is activated post-init, an error will be returned.
|
||||
language_backend:
|
||||
|
||||
# line ending convention to use when writing source files.
|
||||
# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default)
|
||||
# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings.
|
||||
line_ending:
|
||||
|
||||
# list of regex patterns which, when matched, mark a memory entry as read‑only.
|
||||
# Extends the list from the global configuration, merging the two lists.
|
||||
read_only_memory_patterns: []
|
||||
|
||||
# list of regex patterns for memories to completely ignore.
|
||||
# Matching memories will not appear in list_memories or activate_project output
|
||||
# and cannot be accessed via read_memory or write_memory.
|
||||
# To access ignored memory files, use the read_file tool on the raw file path.
|
||||
# Extends the list from the global configuration, merging the two lists.
|
||||
# Example: ["_archive/.*", "_episodes/.*"]
|
||||
ignored_memory_patterns: []
|
||||
|
||||
# advanced configuration option allowing to configure language server-specific options.
|
||||
# Maps the language key to the options.
|
||||
# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available.
|
||||
# No documentation on options means no options are available.
|
||||
ls_specific_settings: {}
|
||||
|
||||
# list of additional workspace folder paths for cross-package reference support (e.g. in monorepos).
|
||||
# Paths can be absolute or relative to the project root.
|
||||
# Each folder is registered as an LSP workspace folder, enabling language servers to discover
|
||||
# symbols and references across package boundaries.
|
||||
# Currently supported for: TypeScript.
|
||||
# Example:
|
||||
# additional_workspace_folders:
|
||||
# - ../sibling-package
|
||||
# - ../shared-lib
|
||||
additional_workspace_folders: []
|
||||
|
||||
# whether the project is in read-only mode
|
||||
# If set to true, all editing tools will be disabled and attempts to use them will result in an error
|
||||
# Added on 2025-04-18
|
||||
read_only: false
|
||||
|
||||
# whether to use project's .gitignore files to ignore files
|
||||
ignore_all_files_in_gitignore: true
|
||||
|
||||
# initial prompt for the project. It will always be given to the LLM upon activating the project
|
||||
# (contrary to the memories, which are loaded on demand).
|
||||
initial_prompt: ''
|
||||
@@ -97,11 +97,17 @@ static spatial::SpatReconciliatorOp insertRowStripReconciliator(IRRewriter& rewr
|
||||
value.getLoc(),
|
||||
outputType,
|
||||
value,
|
||||
ValueRange {},
|
||||
rewriter.getStringAttr(kLogicalLayout),
|
||||
rewriter.getStringAttr(kRowStripLayout),
|
||||
rewriter.getDenseI64ArrayAttr(offsets),
|
||||
rewriter.getDenseI64ArrayAttr(sizes),
|
||||
rewriter.getStringAttr(kRowStripIndexMap));
|
||||
rewriter.getStringAttr(kRowStripIndexMap),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
static void materializeDenseUses(IRRewriter& rewriter,
|
||||
|
||||
@@ -233,15 +233,21 @@ def SpatReluPlanOp : SpatOp<"relu_plan", []> {
|
||||
}
|
||||
|
||||
def SpatReconciliatorOp : SpatOp<"reconciliator", []> {
|
||||
let summary = "Passive logical-to-physical layout selection record";
|
||||
let summary = "Logical-to-physical layout record or explicit fragment assembly";
|
||||
|
||||
let arguments = (ins
|
||||
SpatTensor:$input,
|
||||
Variadic<SpatTensor>:$fragments,
|
||||
StrAttr:$logicalLayout,
|
||||
StrAttr:$physicalLayout,
|
||||
DenseI64ArrayAttr:$fragmentOffsets,
|
||||
DenseI64ArrayAttr:$fragmentSizes,
|
||||
StrAttr:$indexMap
|
||||
StrAttr:$indexMap,
|
||||
OptionalAttr<StrAttr>:$mode,
|
||||
OptionalAttr<DenseI64ArrayAttr>:$fragmentOperandIndices,
|
||||
OptionalAttr<DenseI64ArrayAttr>:$fragmentStrides,
|
||||
OptionalAttr<StrAttr>:$conflictPolicy,
|
||||
OptionalAttr<StrAttr>:$coveragePolicy
|
||||
);
|
||||
|
||||
let results = (outs
|
||||
|
||||
@@ -383,7 +383,7 @@ LogicalResult SpatConcatOp::verify() {
|
||||
static bool isKnownLogicalLayout(StringRef layout) { return layout == "nchw"; }
|
||||
|
||||
static bool isKnownPhysicalLayout(StringRef layout) {
|
||||
return layout == "dense_nchw" || layout == "nchw_row_strip";
|
||||
return layout == "dense_nchw" || layout == "nchw_row_strip" || layout == "fragmented";
|
||||
}
|
||||
|
||||
static LogicalResult verifyPlanTensorTypes(Operation* op, Value input, Value output, StringRef kind) {
|
||||
@@ -437,7 +437,9 @@ LogicalResult SpatReluPlanOp::verify() {
|
||||
}
|
||||
|
||||
LogicalResult SpatReconciliatorOp::verify() {
|
||||
if (failed(verifyPlanTensorTypes(getOperation(), getInput(), getOutput(), "spat.reconciliator")))
|
||||
auto modeAttr = getModeAttr();
|
||||
bool isFragmentAssembly = modeAttr && modeAttr.getValue() == "fragment_assembly";
|
||||
if (!isFragmentAssembly && failed(verifyPlanTensorTypes(getOperation(), getInput(), getOutput(), "spat.reconciliator")))
|
||||
return failure();
|
||||
if (!isKnownLogicalLayout(getLogicalLayout()))
|
||||
return emitError("requires a known logical layout");
|
||||
@@ -452,23 +454,154 @@ LogicalResult SpatReconciliatorOp::verify() {
|
||||
auto sizes = getFragmentSizes();
|
||||
if (offsets.size() != sizes.size())
|
||||
return emitError("fragment offset and size arrays must have the same length");
|
||||
int64_t rank = logicalType.getRank();
|
||||
if (offsets.empty())
|
||||
return success();
|
||||
|
||||
int64_t rank = logicalType.getRank();
|
||||
if (rank <= 0 || offsets.size() % rank != 0)
|
||||
return emitError("fragment metadata must be a whole number of rank-sized fragments");
|
||||
|
||||
ArrayRef<int64_t> shape = logicalType.getShape();
|
||||
for (int64_t index = 0; index < static_cast<int64_t>(offsets.size()); ++index) {
|
||||
int64_t dim = index % rank;
|
||||
int64_t offset = offsets[index];
|
||||
int64_t size = sizes[index];
|
||||
if (offset < 0 || size < 0)
|
||||
return emitError("fragment offsets and sizes must be non-negative");
|
||||
int64_t logicalDim = shape[dim];
|
||||
if (!ShapedType::isDynamic(logicalDim) && offset + size > logicalDim)
|
||||
return emitError("fragment bounds must stay within the logical tensor shape");
|
||||
auto verifyBoundsOnly = [&](ArrayRef<int64_t> strideValues) -> LogicalResult {
|
||||
ArrayRef<int64_t> shape = logicalType.getShape();
|
||||
for (int64_t index = 0; index < static_cast<int64_t>(offsets.size()); ++index) {
|
||||
int64_t dim = index % rank;
|
||||
int64_t offset = offsets[index];
|
||||
int64_t size = sizes[index];
|
||||
int64_t stride = strideValues.empty() ? 1 : strideValues[index];
|
||||
if (offset < 0 || size < 0 || stride < 0)
|
||||
return emitError("fragment offsets, sizes, and strides must be non-negative");
|
||||
int64_t logicalDim = shape[dim];
|
||||
if (!ShapedType::isDynamic(logicalDim) && offset + size > logicalDim)
|
||||
return emitError("fragment bounds must stay within the logical tensor shape");
|
||||
if (stride != 1)
|
||||
return emitError("fragment assembly currently requires unit strides");
|
||||
}
|
||||
return success();
|
||||
};
|
||||
|
||||
if (!isFragmentAssembly) {
|
||||
if (failed(verifyBoundsOnly({})))
|
||||
return failure();
|
||||
if (!getFragments().empty())
|
||||
return emitError("legacy reconciliator does not accept extra fragment operands");
|
||||
if (getFragmentStridesAttr() || getConflictPolicyAttr() || getCoveragePolicyAttr())
|
||||
return emitError("legacy reconciliator does not accept fragment assembly attributes");
|
||||
return success();
|
||||
}
|
||||
|
||||
auto stridesAttr = getFragmentStridesAttr();
|
||||
auto operandIndicesAttr = getFragmentOperandIndicesAttr();
|
||||
if (!operandIndicesAttr)
|
||||
return emitError("fragment assembly reconciliator requires fragment operand indices");
|
||||
if (!stridesAttr)
|
||||
return emitError("fragment assembly reconciliator requires fragment strides");
|
||||
ArrayRef<int64_t> operandIndices = operandIndicesAttr.asArrayRef();
|
||||
ArrayRef<int64_t> strides = stridesAttr.asArrayRef();
|
||||
if (strides.size() != offsets.size())
|
||||
return emitError("fragment stride and offset arrays must have the same length");
|
||||
if (!getConflictPolicyAttr() || !getCoveragePolicyAttr())
|
||||
return emitError("fragment assembly reconciliator requires conflict and coverage policies");
|
||||
if (getConflictPolicy() != "disjoint")
|
||||
return emitError("fragment assembly reconciliator currently supports only conflict_policy=\"disjoint\"");
|
||||
if (getCoveragePolicy() != "complete" && getCoveragePolicy() != "partial")
|
||||
return emitError("fragment assembly reconciliator coverage_policy must be \"complete\" or \"partial\"");
|
||||
|
||||
SmallVector<Value> operands;
|
||||
operands.push_back(getInput());
|
||||
llvm::append_range(operands, getFragments());
|
||||
int64_t operandCount = static_cast<int64_t>(operands.size());
|
||||
int64_t fragmentCount = static_cast<int64_t>(operandIndices.size());
|
||||
if (operandCount == 0)
|
||||
return emitError("fragment assembly reconciliator requires at least one operand");
|
||||
if (static_cast<int64_t>(offsets.size()) != fragmentCount * rank)
|
||||
return emitError("fragment assembly metadata count must match operand count * result rank");
|
||||
if (failed(verifyBoundsOnly(strides)))
|
||||
return failure();
|
||||
|
||||
SmallVector<std::pair<SmallVector<int64_t, 4>, SmallVector<int64_t, 4>>, 8> slices;
|
||||
slices.reserve(static_cast<size_t>(fragmentCount));
|
||||
SmallVector<SmallVector<SmallVector<int64_t, 4>, 4>, 8> sizesByOperand(static_cast<size_t>(operandCount));
|
||||
for (int64_t fragmentIndex = 0; fragmentIndex < fragmentCount; ++fragmentIndex) {
|
||||
int64_t operandIndex = operandIndices[fragmentIndex];
|
||||
if (operandIndex < 0 || operandIndex >= operandCount)
|
||||
return emitError("fragment assembly operand index is out of range");
|
||||
|
||||
auto operandType = dyn_cast<RankedTensorType>(operands[operandIndex].getType());
|
||||
if (!operandType || !operandType.hasStaticShape())
|
||||
return emitError("fragment assembly reconciliator requires static ranked tensor operands");
|
||||
if (operandType.getRank() != rank)
|
||||
return emitError("fragment assembly reconciliator requires operand/result rank match");
|
||||
|
||||
SmallVector<int64_t, 4> fragmentOffsets;
|
||||
SmallVector<int64_t, 4> fragmentSizes;
|
||||
fragmentOffsets.reserve(rank);
|
||||
fragmentSizes.reserve(rank);
|
||||
for (int64_t dim = 0; dim < rank; ++dim) {
|
||||
int64_t flatIndex = fragmentIndex * rank + dim;
|
||||
fragmentOffsets.push_back(offsets[flatIndex]);
|
||||
fragmentSizes.push_back(sizes[flatIndex]);
|
||||
}
|
||||
|
||||
sizesByOperand[static_cast<size_t>(operandIndex)].push_back(fragmentSizes);
|
||||
|
||||
for (const auto& [existingOffsets, existingSizes] : slices) {
|
||||
bool overlaps = true;
|
||||
for (int64_t dim = 0; dim < rank; ++dim) {
|
||||
int64_t begin = fragmentOffsets[dim];
|
||||
int64_t end = begin + fragmentSizes[dim];
|
||||
int64_t existingBegin = existingOffsets[dim];
|
||||
int64_t existingEnd = existingBegin + existingSizes[dim];
|
||||
if (end <= existingBegin || existingEnd <= begin) {
|
||||
overlaps = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (overlaps)
|
||||
return emitError("fragment assembly reconciliator requires disjoint static slices");
|
||||
}
|
||||
slices.push_back({std::move(fragmentOffsets), std::move(fragmentSizes)});
|
||||
}
|
||||
|
||||
for (int64_t operandIndex = 0; operandIndex < operandCount; ++operandIndex) {
|
||||
if (sizesByOperand[static_cast<size_t>(operandIndex)].empty())
|
||||
return emitError("fragment assembly reconciliator requires every operand to contribute at least one fragment");
|
||||
|
||||
auto operandType = cast<RankedTensorType>(operands[operandIndex].getType());
|
||||
ArrayRef<int64_t> operandShape = operandType.getShape();
|
||||
auto& fragmentShapes = sizesByOperand[static_cast<size_t>(operandIndex)];
|
||||
if (fragmentShapes.size() == 1) {
|
||||
if (!llvm::equal(operandShape, fragmentShapes.front()))
|
||||
return emitError("single-fragment reconciliator operand shape must match declared fragment size");
|
||||
continue;
|
||||
}
|
||||
|
||||
ArrayRef<int64_t> fragmentShape = fragmentShapes.front();
|
||||
for (ArrayRef<int64_t> otherShape : fragmentShapes)
|
||||
if (!llvm::equal(fragmentShape, otherShape))
|
||||
return emitError("packed reconciliator operand requires equal fragment sizes per operand");
|
||||
if (llvm::equal(operandShape, fragmentShape))
|
||||
continue;
|
||||
if (!llvm::equal(operandShape.drop_front(), fragmentShape.drop_front()))
|
||||
return emitError("packed reconciliator operand must match fragment shape on non-packed dimensions");
|
||||
if (operandShape.front() != static_cast<int64_t>(fragmentShapes.size()) * fragmentShape.front())
|
||||
return emitError("packed reconciliator operand first dimension must equal fragment_count * fragment_size");
|
||||
}
|
||||
|
||||
if (getCoveragePolicy() == "complete") {
|
||||
int64_t covered = 0;
|
||||
int64_t logicalElements = 1;
|
||||
for (int64_t dimSize : logicalType.getShape()) {
|
||||
if (ShapedType::isDynamic(dimSize))
|
||||
return emitError("fragment assembly complete coverage requires static result shape");
|
||||
logicalElements *= dimSize;
|
||||
}
|
||||
for (const auto& [ignoredOffsets, fragmentSizes] : slices) {
|
||||
int64_t fragmentElements = 1;
|
||||
for (int64_t dimSize : fragmentSizes)
|
||||
fragmentElements *= dimSize;
|
||||
covered += fragmentElements;
|
||||
}
|
||||
if (covered != logicalElements)
|
||||
return emitError("fragment assembly complete coverage must cover the whole result exactly");
|
||||
}
|
||||
|
||||
return success();
|
||||
|
||||
+923
-2846
File diff suppressed because it is too large
Load Diff
+9510
File diff suppressed because it is too large
Load Diff
+7548
File diff suppressed because it is too large
Load Diff
+128
@@ -0,0 +1,128 @@
|
||||
--- src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp 2026-06-24 18:51:29.043731129 +0000
|
||||
+++ src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp 2026-06-24 18:51:29.026726895 +0000
|
||||
@@ -4112,104 +4112,8 @@
|
||||
Value originalOutput,
|
||||
Location loc);
|
||||
|
||||
-FailureOr<SmallVector<OpFoldResult, 4>> rematerializeProjectionIndexListForBatchHostOutput(
|
||||
- MaterializerState& state,
|
||||
- MaterializedClass& sourceClass,
|
||||
- ArrayRef<OpFoldResult> values,
|
||||
- IRMapping& mapper,
|
||||
- Location loc) {
|
||||
- SmallVector<OpFoldResult, 4> localized;
|
||||
- localized.reserve(values.size());
|
||||
- for (OpFoldResult value : values) {
|
||||
- FailureOr<OpFoldResult> remapped =
|
||||
- rematerializeIndexOpFoldResultInClass(state, sourceClass, value, loc, &mapper);
|
||||
- if (failed(remapped))
|
||||
- return failure();
|
||||
- localized.push_back(*remapped);
|
||||
- }
|
||||
- return localized;
|
||||
-}
|
||||
-
|
||||
-LogicalResult createProjectionAwareBatchHostInsert(MaterializerState& state,
|
||||
- MaterializedClass& sourceClass,
|
||||
- Value originalOutput,
|
||||
- Value payload,
|
||||
- Value destination,
|
||||
- ArrayRef<ProducerKey> keys,
|
||||
- Location loc) {
|
||||
- auto originalResult = dyn_cast<OpResult>(originalOutput);
|
||||
- if (!originalResult)
|
||||
- return failure();
|
||||
-
|
||||
- auto sourceBatch = dyn_cast_or_null<SpatComputeBatch>(originalResult.getOwner());
|
||||
- if (!sourceBatch || sourceBatch.getNumResults() == 0)
|
||||
- return failure();
|
||||
-
|
||||
- FailureOr<tensor::ParallelInsertSliceOp> projection =
|
||||
- getBatchResultProjectionInsert(sourceBatch, originalResult.getResultNumber());
|
||||
- if (failed(projection))
|
||||
- return failure();
|
||||
-
|
||||
- auto sourceLaneArg = sourceBatch.getLaneArgument();
|
||||
- if (!sourceLaneArg)
|
||||
- return failure();
|
||||
-
|
||||
- auto materializedBatch = dyn_cast<SpatScheduledComputeBatch>(sourceClass.op);
|
||||
- if (!materializedBatch)
|
||||
- return failure();
|
||||
-
|
||||
- auto materializedLaneArg = materializedBatch.getLaneArgument();
|
||||
- if (!materializedLaneArg)
|
||||
- return failure();
|
||||
-
|
||||
- if (keys.size() != sourceClass.cpus.size())
|
||||
- return failure();
|
||||
-
|
||||
- SmallVector<int64_t, 8> logicalLanes;
|
||||
- logicalLanes.reserve(keys.size());
|
||||
- for (ProducerKey key : keys) {
|
||||
- if (key.instance.op != sourceBatch.getOperation() || key.resultIndex != originalResult.getResultNumber())
|
||||
- return failure();
|
||||
- logicalLanes.push_back(key.instance.laneStart);
|
||||
- }
|
||||
-
|
||||
- IRMapping mapper;
|
||||
- Value logicalLane = createIndexedIndexValue(state,
|
||||
- sourceClass.op,
|
||||
- ArrayRef<int64_t>(logicalLanes),
|
||||
- *materializedLaneArg,
|
||||
- loc,
|
||||
- static_cast<int64_t>(sourceClass.cpus.size()),
|
||||
- /*allowExhaustiveTiledSearch=*/false);
|
||||
- mapper.map(*sourceLaneArg, logicalLane);
|
||||
-
|
||||
- FailureOr<SmallVector<OpFoldResult, 4>> offsets =
|
||||
- rematerializeProjectionIndexListForBatchHostOutput(
|
||||
- state, sourceClass, projection->getMixedOffsets(), mapper, loc);
|
||||
- if (failed(offsets))
|
||||
- return failure();
|
||||
- FailureOr<SmallVector<OpFoldResult, 4>> sizes =
|
||||
- rematerializeProjectionIndexListForBatchHostOutput(
|
||||
- state, sourceClass, projection->getMixedSizes(), mapper, loc);
|
||||
- if (failed(sizes))
|
||||
- return failure();
|
||||
- FailureOr<SmallVector<OpFoldResult, 4>> strides =
|
||||
- rematerializeProjectionIndexListForBatchHostOutput(
|
||||
- state, sourceClass, projection->getMixedStrides(), mapper, loc);
|
||||
- if (failed(strides))
|
||||
- return failure();
|
||||
-
|
||||
- tensor::ParallelInsertSliceOp::create(
|
||||
- state.rewriter, loc, payload, destination, *offsets, *sizes, *strides);
|
||||
- return success();
|
||||
-}
|
||||
-
|
||||
LogicalResult
|
||||
-setHostOutputValue(MaterializerState& state,
|
||||
- MaterializedClass& sourceClass,
|
||||
- Value originalOutput,
|
||||
- Value payload,
|
||||
- ArrayRef<ProducerKey> keys = {}) {
|
||||
+setHostOutputValue(MaterializerState& state, MaterializedClass& sourceClass, Value originalOutput, Value payload) {
|
||||
auto resultIt = sourceClass.hostOutputToResultIndex.find(originalOutput);
|
||||
if (resultIt == sourceClass.hostOutputToResultIndex.end())
|
||||
return sourceClass.op->emitError("missing host result slot for materialized output")
|
||||
@@ -4253,10 +4157,6 @@
|
||||
return batch.emitOpError("expected compute_batch output block argument while materializing batch output");
|
||||
|
||||
state.rewriter.setInsertionPointToStart(&inParallelOp.getRegion().front());
|
||||
- if (succeeded(createProjectionAwareBatchHostInsert(
|
||||
- state, sourceClass, originalOutput, payload, *outputArg, keys, payload.getLoc())))
|
||||
- return success();
|
||||
-
|
||||
createDim0ParallelInsertSlice(state, payload.getLoc(), payload, *outputArg, *laneArg);
|
||||
return success();
|
||||
}
|
||||
@@ -4276,7 +4176,7 @@
|
||||
|
||||
MaterializedClass& ownerClass = state.classes[ownerIt->second];
|
||||
if (sourceClass.id == ownerClass.id)
|
||||
- return setHostOutputValue(state, ownerClass, originalOutput, payload, keys);
|
||||
+ return setHostOutputValue(state, ownerClass, originalOutput, payload);
|
||||
|
||||
// Keep the old deadlock-free communication discipline: only scalar-to-scalar
|
||||
// host-owner forwarding is introduced here. Batch host publication remains on
|
||||
@@ -0,0 +1,295 @@
|
||||
#!/usr/bin/env python3.13
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
VALIDATION_DIR = SCRIPT_DIR.parent
|
||||
REPO_ROOT = VALIDATION_DIR.parent
|
||||
if str(VALIDATION_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(VALIDATION_DIR))
|
||||
|
||||
from onnx_utils import _ONNX_TO_NP, onnx_io, write_inputs_to_memory_bin
|
||||
from validate_one import (
|
||||
MODE_COMPILE_ONLY,
|
||||
build_dump_ranges,
|
||||
parse_pim_simulator_outputs,
|
||||
run_pim_simulator,
|
||||
sanitize_output_name,
|
||||
validate_network,
|
||||
)
|
||||
from yolo_real_image_validation import save_tensor_csv
|
||||
|
||||
IMAGENET_MEAN = np.asarray([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
IMAGENET_STD = np.asarray([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
DEFAULT_VGG_MODEL = VALIDATION_DIR / "networks" / "vgg16" / "depth_35" / "vgg16_depth_35.onnx"
|
||||
DEFAULT_RESNET_MODEL = VALIDATION_DIR / "networks" / "resnet" / "resnet18_torchvision.onnx"
|
||||
|
||||
|
||||
def resolve_default_paths():
|
||||
return {
|
||||
"raptor_path": REPO_ROOT / "build_release" / "Release" / "bin" / "onnx-mlir",
|
||||
"onnx_include_dir": REPO_ROOT / "onnx-mlir" / "include",
|
||||
"simulator_dir": REPO_ROOT / "backend-simulators" / "pim" / "pim-simulator",
|
||||
}
|
||||
|
||||
|
||||
def resolve_model_path(network: str | None, model: Path | None) -> Path:
|
||||
if model is not None:
|
||||
return model.resolve()
|
||||
if network == "resnet":
|
||||
return DEFAULT_RESNET_MODEL.resolve()
|
||||
if network == "vgg":
|
||||
return DEFAULT_VGG_MODEL.resolve()
|
||||
raise SystemExit("Pass --model or select a default with --network {resnet,vgg}.")
|
||||
|
||||
|
||||
def ensure_local_artifacts(args, model_path: Path):
|
||||
validate_network(
|
||||
network_onnx_path=model_path,
|
||||
raptor_path=args.raptor_path,
|
||||
onnx_include_dir=args.onnx_include_dir,
|
||||
simulator_dir=args.simulator_dir,
|
||||
crossbar_size=args.crossbar_size,
|
||||
crossbar_count=args.crossbar_count,
|
||||
core_count=args.core_count,
|
||||
command_timeout_seconds=args.command_timeout_seconds,
|
||||
mode=MODE_COMPILE_ONLY,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
|
||||
|
||||
def ensure_existing_artifacts(model_dir: Path):
|
||||
required_paths = [
|
||||
model_dir / "runner" / "build" / "runner",
|
||||
model_dir / "raptor" / "pim" / "config.json",
|
||||
model_dir / "raptor" / "pim" / "memory.bin",
|
||||
]
|
||||
missing = [str(path) for path in required_paths if not path.exists()]
|
||||
if missing:
|
||||
raise FileNotFoundError(
|
||||
"Missing compiled local artifacts. Re-run without --skip-compile or restore these paths:\n "
|
||||
+ "\n ".join(missing)
|
||||
)
|
||||
|
||||
|
||||
def preprocess_classification_image(image_path: Path) -> tuple[Image.Image, np.ndarray]:
|
||||
image = Image.open(image_path).convert("RGB")
|
||||
width, height = image.size
|
||||
scale = 256.0 / min(width, height)
|
||||
resized_size = (
|
||||
max(1, int(round(width * scale))),
|
||||
max(1, int(round(height * scale))),
|
||||
)
|
||||
resized = image.resize(resized_size, Image.Resampling.BILINEAR)
|
||||
|
||||
left = (resized.width - 224) // 2
|
||||
top = (resized.height - 224) // 2
|
||||
cropped = resized.crop((left, top, left + 224, top + 224))
|
||||
|
||||
array = np.asarray(cropped, dtype=np.float32) / 255.0
|
||||
array = (array - IMAGENET_MEAN) / IMAGENET_STD
|
||||
chw = np.transpose(array, (2, 0, 1))
|
||||
tensor = np.expand_dims(chw.astype(np.float32, copy=False), axis=0)
|
||||
return image, tensor
|
||||
|
||||
|
||||
def load_labels(labels_path: Path | None) -> list[str] | None:
|
||||
if labels_path is None:
|
||||
return None
|
||||
labels = [line.strip() for line in labels_path.read_text().splitlines()]
|
||||
return labels or None
|
||||
|
||||
|
||||
def softmax(values: np.ndarray) -> np.ndarray:
|
||||
shifted = values - np.max(values)
|
||||
exp = np.exp(shifted)
|
||||
denom = exp.sum()
|
||||
if not math.isfinite(float(denom)) or denom <= 0.0:
|
||||
raise RuntimeError("Softmax received non-finite output scores.")
|
||||
return exp / denom
|
||||
|
||||
|
||||
def decode_classification_output(output: np.ndarray, labels: list[str] | None, top_k: int):
|
||||
scores = np.asarray(output, dtype=np.float64).reshape(-1)
|
||||
probabilities = softmax(scores)
|
||||
limit = min(top_k, probabilities.size)
|
||||
top_indices = np.argsort(probabilities)[-limit:][::-1]
|
||||
results = []
|
||||
for index in top_indices:
|
||||
label = None
|
||||
if labels is not None and 0 <= int(index) < len(labels):
|
||||
label = labels[int(index)]
|
||||
results.append(
|
||||
{
|
||||
"index": int(index),
|
||||
"label": label,
|
||||
"probability": float(probabilities[int(index)]),
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def render_result_line(result) -> str:
|
||||
name = result["label"] if result["label"] else f'class {result["index"]}'
|
||||
return f'{name}: {result["probability"] * 100.0:.2f}%'
|
||||
|
||||
|
||||
def draw_classification_panel(image: Image.Image, results, output_path: Path):
|
||||
annotated = image.copy()
|
||||
draw = ImageDraw.Draw(annotated)
|
||||
lines = [render_result_line(result) for result in results]
|
||||
if not lines:
|
||||
lines = ["No predictions"]
|
||||
|
||||
padding = 10
|
||||
line_gap = 4
|
||||
max_width = 0
|
||||
line_heights = []
|
||||
for line in lines:
|
||||
left, top, right, bottom = draw.textbbox((0, 0), line)
|
||||
max_width = max(max_width, right - left)
|
||||
line_heights.append(bottom - top)
|
||||
|
||||
panel_height = padding * 2 + sum(line_heights) + line_gap * (len(lines) - 1)
|
||||
panel_width = padding * 2 + max_width
|
||||
origin_x = 12
|
||||
origin_y = 12
|
||||
draw.rounded_rectangle(
|
||||
(origin_x, origin_y, origin_x + panel_width, origin_y + panel_height),
|
||||
radius=10,
|
||||
fill=(0, 0, 0),
|
||||
)
|
||||
|
||||
y = origin_y + padding
|
||||
for line, line_height in zip(lines, line_heights):
|
||||
draw.text((origin_x + padding, y), line, fill=(255, 255, 255))
|
||||
y += line_height + line_gap
|
||||
|
||||
annotated.save(output_path)
|
||||
|
||||
|
||||
def run_reference_and_simulator(args, model_path: Path, tensor: np.ndarray):
|
||||
model_dir = model_path.parent
|
||||
runner_build_dir = model_dir / "runner" / "build"
|
||||
runner_path = runner_build_dir / "runner"
|
||||
pim_dir = model_dir / "raptor" / "pim"
|
||||
simulation_dir = model_dir / "classification_demo" / "simulation"
|
||||
reference_dir = model_dir / "classification_demo" / "reference"
|
||||
inputs_dir = model_dir / "classification_demo" / "inputs"
|
||||
|
||||
simulation_dir.mkdir(parents=True, exist_ok=True)
|
||||
reference_dir.mkdir(parents=True, exist_ok=True)
|
||||
inputs_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
input_descriptors, output_descriptors = onnx_io(model_path)
|
||||
if len(input_descriptors) != 1:
|
||||
raise RuntimeError(f"Expected one classification input tensor, found {len(input_descriptors)}")
|
||||
if len(output_descriptors) != 1:
|
||||
raise RuntimeError(f"Expected one classification output tensor, found {len(output_descriptors)}")
|
||||
|
||||
input_index, _input_name, _input_dtype, input_shape = input_descriptors[0]
|
||||
if list(tensor.shape) != list(input_shape):
|
||||
raise RuntimeError(f"Preprocessed tensor shape {list(tensor.shape)} does not match model input {input_shape}")
|
||||
|
||||
input_csv = inputs_dir / "in0.csv"
|
||||
save_tensor_csv(tensor, input_csv)
|
||||
|
||||
runner_cmd = [
|
||||
str(runner_path),
|
||||
f"--in{input_index}-csv-file",
|
||||
str(input_csv),
|
||||
f"--in{input_index}-shape",
|
||||
"x".join(str(dim) for dim in tensor.shape),
|
||||
"--save-csv-dir",
|
||||
str(reference_dir),
|
||||
]
|
||||
subprocess.run(runner_cmd, cwd=runner_build_dir, check=True)
|
||||
|
||||
write_inputs_to_memory_bin(pim_dir / "memory.bin", pim_dir / "config.json", [tensor])
|
||||
dump_ranges = build_dump_ranges(pim_dir / "config.json", output_descriptors)
|
||||
output_bin_path = simulation_dir / "out.bin"
|
||||
run_pim_simulator(
|
||||
args.simulator_dir,
|
||||
pim_dir,
|
||||
output_bin_path,
|
||||
dump_ranges,
|
||||
timeout_sec=args.command_timeout_seconds,
|
||||
)
|
||||
|
||||
output_index, output_name, output_dtype_code, output_shape = output_descriptors[0]
|
||||
output_dtype = np.dtype(_ONNX_TO_NP[output_dtype_code])
|
||||
reference_csv = reference_dir / f"output{output_index}_{sanitize_output_name(output_name)}.csv"
|
||||
reference_output = np.loadtxt(reference_csv, delimiter=",", dtype=output_dtype).reshape(output_shape)
|
||||
simulator_output = parse_pim_simulator_outputs(output_bin_path, output_descriptors)[0]
|
||||
return reference_output, simulator_output
|
||||
|
||||
|
||||
def print_topk(title: str, results):
|
||||
print(title)
|
||||
for rank, result in enumerate(results, start=1):
|
||||
label_text = result["label"] if result["label"] else f'class {result["index"]}'
|
||||
print(f' {rank}. {label_text} ({result["probability"] * 100.0:.2f}%) [index={result["index"]}]')
|
||||
|
||||
|
||||
def main():
|
||||
defaults = resolve_default_paths()
|
||||
|
||||
parser = argparse.ArgumentParser(description="Run a VGG or ResNet ONNX model through the Raptor simulator and annotate the image with top classification results.")
|
||||
parser.add_argument("--model", type=Path, default=None)
|
||||
parser.add_argument("--network", choices=("resnet", "vgg"), default=None)
|
||||
parser.add_argument("--image", type=Path, required=True)
|
||||
parser.add_argument("--labels", type=Path, default=None)
|
||||
parser.add_argument("--output", type=Path, required=True)
|
||||
parser.add_argument("--raptor-path", type=Path, default=defaults["raptor_path"])
|
||||
parser.add_argument("--onnx-include-dir", type=Path, default=defaults["onnx_include_dir"])
|
||||
parser.add_argument("--simulator-dir", type=Path, default=defaults["simulator_dir"])
|
||||
parser.add_argument("--crossbar-size", type=int, default=2048)
|
||||
parser.add_argument("--crossbar-count", type=int, default=256)
|
||||
parser.add_argument("--core-count", type=int, default=1000)
|
||||
parser.add_argument("--top-k", type=int, default=5)
|
||||
parser.add_argument("--command-timeout-seconds", type=float, default=7200.0)
|
||||
parser.add_argument("--skip-compile", action="store_true")
|
||||
parser.add_argument("--verbose", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
args.model = resolve_model_path(args.network, args.model)
|
||||
args.image = args.image.resolve()
|
||||
args.output = args.output.resolve()
|
||||
args.labels = args.labels.resolve() if args.labels else None
|
||||
args.raptor_path = args.raptor_path.resolve()
|
||||
args.onnx_include_dir = args.onnx_include_dir.resolve()
|
||||
args.simulator_dir = args.simulator_dir.resolve()
|
||||
|
||||
if not args.skip_compile:
|
||||
ensure_local_artifacts(args, args.model)
|
||||
else:
|
||||
ensure_existing_artifacts(args.model.parent)
|
||||
|
||||
original_image, tensor = preprocess_classification_image(args.image)
|
||||
labels = load_labels(args.labels)
|
||||
reference_output, simulator_output = run_reference_and_simulator(args, args.model, tensor)
|
||||
reference_results = decode_classification_output(reference_output, labels, args.top_k)
|
||||
simulator_results = decode_classification_output(simulator_output, labels, args.top_k)
|
||||
|
||||
print_topk("Reference top-k:", reference_results)
|
||||
print_topk("Simulator top-k:", simulator_results)
|
||||
|
||||
reference_scores = np.asarray(reference_output, dtype=np.float64).reshape(-1)
|
||||
simulator_scores = np.asarray(simulator_output, dtype=np.float64).reshape(-1)
|
||||
max_abs_diff = float(np.max(np.abs(reference_scores - simulator_scores)))
|
||||
print(f"Max absolute score diff: {max_abs_diff:.6e}")
|
||||
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
draw_classification_panel(original_image, simulator_results, args.output)
|
||||
print(f"Annotated image saved to {args.output}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user