在今年使用openubmc适配atlas300ipro、300vpro、300v的时候,由于这三张卡的四元组信息是一致的,bmc无法正常区分这三张卡,在社区上提过一些帖子。后来参加Meetup@深圳,与一位大佬交流后受到了启发,可以通过新建一个对象单独对atlas卡再做带外管理获取boardid来解决这个问题,参加完会议回来后,进行了测试与实验,最终成功区分了。
具体步骤如下:
1.参考Atlas 300V pro NPU卡信息显示不全 帖子中对300vpro卡的csr配置的思路是可以正常识别区分,但是发现在第一次启动的时候加载的会非常慢,实测在bios启动完成后还需要3分钟才能加载完成并显示,而且使用ipmitool fru list工具会获取不到卡的fru信息。
2.这张卡的带外管理代码应该是闭源的,无法直接改动,所以新增一个组件和对象。新建组件参考帖子:【教学培训篇】新增组件
model.json:
{
"wzAtlasModel": {
"path": "/bmc/kepler/wzAtlasModel/${id}",
"interfaces": {
"bmc.kepler.wzapp.AtlasCard": {
"properties": {
"BoardID": {
"usage": [
"CSR"
]
}
}
}
},
"properties": {
"RefMCUChip": {
"usage": [
"CSR"
],
"baseType": "U8[]",
"refInterface": "bmc.kepler.Chip.BlockIO"
}
}
}
}
app.lua:
-- Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
--
-- this file licensed under the Mulan PSL v2.
-- You can use this software according to the terms and conditions of the Mulan PSL v2.
-- You may obtain a copy of Mulan PSL v2 at: http://license.coscl.org.cn/MulanPSL2
--
-- THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
-- PURPOSE.
-- See the Mulan PSL v2 for more details.
--
-- Author: auto generate
-- Create: 2025-11-24
-- Description: DO NOT EDIT; Code generated by "${project_name}_app.lua.mako"
local class = require 'mc.class'
local service = require 'wzapp.service'
local logging = require 'mc.logging'
local object_manage = require 'mc.mdb.object_manage'
local client = require 'wzapp.client'
local mc_admin = require 'mc.mc_admin'
local context = require 'mc.context'
local skynet = require 'skynet'
local crc8 = require 'mc.crc8'
local std_smbus_cfg = require 'std_smbus'
local wzapp = class(service)
function wzapp:check_dependencies()
logging:notice("check dependencies start")
local admin = mc_admin.new()
admin:parse_dependency(APP_WORKING_DIRECTORY .. '/mds/service.json')
admin:check_dependency(self.bus)
end
function wzapp:ctor()
end
function wzapp:init()
logging:notice("wzapp:init start")
self.super.init(self)
self:check_dependencies()
self:register_mds_callback()
logging:notice("wzapp: init end")
end
function wzapp:register_mds_callback()
self.wzatlasmodels = self.wzatlasmodels or {}
self.atlascard_tasks = self.atlascard_tasks or {}
object_manage.on_add_object(self.bus, function(class_name, object, position)
if class_name =='wzAtlasModel' then
logging:notice("wzapp: register_mds_callback")
self.wzatlasmodels[position] = object
local smbus = std_smbus_cfg.new(object['RefMCUChip'], 32)
self.atlascard_tasks[position] = {
smbus_obj = smbus,
running = true
}
skynet.fork(function()
self:atlascard_task(position, object)
end)
logging:notice("wzapp: new std_smbus")
end
end)
object_manage.on_delete_object(self.bus, function(class_name, object, position)
if class_name == 'wzAtlasModel' then
-- 停止任务
if self.atlascard_tasks[position] then
self.atlascard_tasks[position].running = false
self.atlascard_tasks[position] = nil
end
-- 删除对象
self.wzatlasmodels[position] = nil
end
end)
end
function wzapp:atlascard_task(position, object)
local taskinfo = self.atlascard_tasks[position]
if not taskinfo then return end
local smbus = taskinfo.smbus_obj
skynet.sleep(300) -- 启动延迟
logging:notice("atlascard_task start for position = " .. tostring(position))
while true do
-- 任务退出条件
if not taskinfo.running then
logging:notice("task stopped by delete callback, exit task for position = " .. position)
break
end
if self.wzatlasmodels[position] ~= object then
logging:notice("object changed/removed, exit task for position = " .. position)
break
end
------------------------------------
-- BoardID
------------------------------------
local ok_b, bid = pcall(function()
return smbus:GetBoardid()
end)
if ok_b and bid then
object.BoardID = bid.boardid
logging:notice("BoardID[" .. position .. "] = " .. tostring(bid.boardid))
else
logging:notice("GetBoardid[" .. position .. "] error: " .. tostring(bid))
end
end
logging:notice("atlascard_task exit for position = " .. tostring(position))
end
return wzapp
std_smbus和smbus是参考 AtomGit | GitCode - 全球开发者的开源社区,开源代码托管平台 ,需要再std_smbus.lua的实现基础上,修改添加以下内容:
local std_smbus = { slave_address = 0xD8 } -- 300vpro/300ipro/300v的mcu iic地址
function std_smbus.new(chip, buffer_len)
return setmetatable({ chip = chip, buffer_len = buffer_len, capability = { [0x00] = 1 , [0x04] = 1, [0x0F] = 1, [0x03] = 1,}, process_queue = {} },
std_smbus)
end
function std_smbus:GetBoardid()
local recv_data = self:_send_and_receive_request_in_frames({
lun = 0x80,
arg = 0x00,
opcode = 0x000F,
offset = 0x00,
length = 2,
data = ''
}, 2)
logging:notice("[RAW][BoardID] len = " .. tostring(#recv_data))
logging:notice("[RAW][BoardID] DEC = " .. dump_dec(recv_data))
return bs.new([[<<boardid:16>>]]):unpack(recv_data, true)
end
我们还需要在mdb_interface组件添加对应的文件:
json/path/mdb/bmc/kepler/wzAtlasModel/wzAtlasModel.json:
{
"wzAtlasModel": {
"path": "/bmc/kepler/wzAtlasModel/${id}",
"interfaces": [
"bmc.kepler.wzapp.AtlasCard"
]
}
}
json/intf/mdb/bmc/kepler/wzapp/AtlasCard.json:
{
"bmc.kepler.wzapp.AtlasCard": {
"properties": {
"BoardID": {
"baseType": "U16",
"readOnly": false,
"description": "单板ID"
}
}
}
}
到这里,我们进行gen&build mdb_interface和wzapp即可在资源树上可以看到新增的类和属性:
~ ~ $ mdbctl lsobj wzAtlasModel
wzAtlasModel_0101010102
~ ~ $ mdbctl lsprop wzAtlasModel_0101010102
bmc.kepler.Object.Properties
ClassName="wzAtlasModel"
ObjectIdentifier=[1,"1","1","0101010102"]
ObjectName="wzAtlasModel_0101010102"
bmc.kepler.wzapp.AtlasCard
BoardID=175
Private
RefMCUChip="$ref"
类和属性配置好后,我们在vpd中实例化,我的想法是先分别配置300ipro/300vpro/300v的connector,再通过boardid来判断在位哪一张atlas卡。注意,这里还需要把上一级connector传入的slot和Container参数继续传递到下一级pciedevice对象中,这样才能够正确的显示位置和槽位信息。
将原有的14140130_19e5d500_02000100.sr文件修改为:
{
"FormatVersion": "3.00",
"DataVersion": "3.00",
"Unit": {
"Type": "PCIeCard",
"Name": "PCIeCard_1"
},
"ManagementTopology": {
"Anchor": {
"Buses": [
"I2cMux_Chan"
]
},
"I2cMux_Chan": {
"Chips": [
"Chip_Dmini"
]
}
},
"Objects": {
"Chip_Dmini": {
"OffsetWidth": 1,
"AddrWidth": 1,
"Address": 216,
"WriteTmout": 100,
"ReadTmout": 100,
"HealthStatus": 0,
"WriteRetryTimes": 2,
"ReadRetryTimes": 0
},
"wzAtlasModel":{
"RefMCUChip":"#/Chip_Dmini"
},
"Connector_npu_1": {
"Bom": "14140130",
"Slot": "${Slot}",
"Position": 1,
"Presence": "#/wzAtlasModel.BoardID |> expr( $1==171 ? 1 : 0)",
"Id": "19e5d500",
"AuxId": "171",
"Buses": [
"I2cMux_Chan"
],
"SystemId": "${SystemId}",
"ManagerId": "${ManagerId}",
"ChassisId": "${ChassisId}",
"Container": "${Container}",
"IdentifyMode": 2
},
"Connector_npu_2": {
"Bom": "14140130",
"Slot": "${Slot}",
"Position": 2,
"Presence": "#/wzAtlasModel.BoardID |> expr( $1==175 ? 1 : 0)",
"Id": "19e5d500",
"AuxId": "175",
"Buses": [
"I2cMux_Chan"
],
"SystemId": "${SystemId}",
"ManagerId": "${ManagerId}",
"ChassisId": "${ChassisId}",
"Container": "${Container}",
"IdentifyMode": 2
},
"Connector_npu_3": {
"Bom": "14140130",
"Slot": "${Slot}",
"Position": 3,
"Presence": "#/wzAtlasModel.BoardID |> expr( $1==182 ? 1 : 0)",
"Id": "19e5d500",
"AuxId": "182",
"Buses": [
"I2cMux_Chan"
],
"SystemId": "${SystemId}",
"ManagerId": "${ManagerId}",
"ChassisId": "${ChassisId}",
"Container": "${Container}",
"IdentifyMode": 2
}
}
}
新增3个sr文件及对应的soft文件(这里只展示300ipro的,其他卡的sr类似),其实这个sr就是原本的14140130_19e5d500_02000100.sr
14140130_19e5d500_171.sr:
{
"FormatVersion": "3.00",
"DataVersion": "3.00",
"Unit": {
"Type": "PCIeCard",
"Name": "PCIeCard_1"
},
"ManagementTopology": {
"Anchor": {
"Buses": [
"I2cMux_Chan"
]
},
"I2cMux_Chan": {
"Chips": [
"Chip_Dmini",
"Chip_Dmini_Elabel"
]
}
},
"Objects": {
"DftMcu_1": {
"Id": 172,
"Type": 2,
"Slot": "${Slot}",
"DeviceNum": 2,
"ItemName": "MCU Device Self Test",
"PrompteReady": "",
"PrompteFinish": ""
},
"DftMcu_2": {
"Id": 172,
"Type": 2,
"Slot": "${Slot}",
"DeviceNum": 5,
"ItemName": "MCU Log Clear Test",
"PrompteReady": "",
"PrompteFinish": ""
},
"DftMcu_3": {
"Id": 172,
"Type": 1,
"Slot": "${Slot}",
"DeviceNum": 7,
"ItemName": "MCU self interrupt Test",
"PrompteReady": "",
"PrompteFinish": ""
},
"Chip_Dmini": {
"OffsetWidth": 1,
"AddrWidth": 1,
"Address": 216,
"WriteTmout": 100,
"ReadTmout": 100,
"HealthStatus": 0,
"WriteRetryTimes": 2,
"ReadRetryTimes": 0
},
"Chip_Dmini_Elabel": {
"OffsetWidth": 1,
"AddrWidth": 1,
"Address": 0,
"WriteTmout": 100,
"ReadTmout": 100,
"HealthStatus": 0,
"WriteRetryTimes": 2,
"ReadRetryTimes": 216
},
"Entity_GPUCard": {
"Id": 11,
"Name": "PCIe Card",
"PowerState": 1,
"Presence": 1,
"Instance": 101
},
"wzAtlasModel":{
"RefMCUChip":"#/Chip_Dmini"
},
"PCIeDevice_1": {
"Segment": 1,
"DeviceName": "PCIe Card $ (NPU)",
"DiagnosticFault": 0,
"PredictiveFault": 0,
"FunctionClass": 9,
"LinkSpeedReduced": 0,
"CorrectableError": 0,
"UncorrectableError": 0,
"FatalError": 0,
"Position": "",
"SlotID": "${Slot}",
"Container": "${Container}",
"GroupPosition": "PCIeDevice_${GroupPosition}",
"DeviceType": 8,
"PCIeDeviceType": "SingleFunction",
"SlotType": "FullLength",
"FunctionProtocol": "PCIe",
"FunctionType": "Physical"
},
"FruData_NPUCard": {
"FruId": 1,
"StorageType": "MCU",
"FruDev": "#/Chip_Dmini_Elabel"
},
"Fru_NPUCard": {
"PcbVersion": ".A",
"FruId": 1,
"PowerState": 1,
"FruName": "Atlas 300I Pro",
"Health": 0,
"EepStatus": 1,
"Type": 8,
"FruDataId": "#/FruData_NPUCard"
},
"NPUCard_1":{
"Name": "<=/PCIeCard_1.Name",
"CardDescription": "<=/PCIeCard_1.Description",
"DeviceName": "<=/PCIeDevice_1.DeviceName",
"RefChip":"#/Chip_Dmini",
"RefEeprom":"#/Chip_Dmini_Elabel",
"RefFrudata": "#/FruData_NPUCard",
"Model": "Atlas_300I_Pro",
"SlotNumber": "${Slot}",
"PcbVersion": ".A",
"BoardID": 171,
"FirmwareVersion": "N/A",
"CardPartNumber": "03028DFH",
"SerialNumber":"<=/FruData_NPUCard.BoardSerialNumber"
},
"PCIeCard_1": {
"DeviceName": "<=/PCIeDevice_1.DeviceName",
"SlotID": "<=/PCIeDevice_1.SlotID",
"NodeID": "<=/PCIeDevice_1.SlotID |> string.format('PCIeCard%s',$1)",
"Health": "<=/Component_PCIeCard.Health",
"Name": "Atlas 300I Pro Inference Card",
"BoardName": "<=/FruData_NPUCard.BoardProductName;<=/FruData_NPUCard.BoardProductName |> string.cmp($1, '') |> expr($1 ? 'IT21PDDA' : $2)",
"Description": "#/PCIeCard_1.Name |> string.format('%s PCI-E 1*16x (HHHL)', $1)",
"FunctionClass": 9,
"VendorID": 6629,
"DeviceID": 54528,
"SubVendorID": 512,
"SubDeviceID": 256,
"Position": "<=/PCIeDevice_1.Position",
"LaneOwner": "<=/PCIeDevice_1.SocketID",
"FirmwareVersion": "#/NPUCard_1.FirmwareVersion",
"Manufacturer": "Huawei",
"PartNumber": "03028DFH",
"MaxFrameLen": 64,
"LinkSpeed": "N/A",
"LinkSpeedCapability": "N/A",
"PcbVersion": "#/NPUCard_1.PcbVersion",
"BoardID": "#/NPUCard_1.BoardID",
"DevBus": "<=/PCIeDevice_1.DevBus",
"DevDevice": "<=/PCIeDevice_1.DevDevice",
"SerialNumber": "<=/FruData_NPUCard.BoardSerialNumber",
"DevFunction": "<=/PCIeDevice_1.DevFunction"
},
"Event_PCIeCardUCE": {
"EventKeyId": "PCIeCard.PCIeCardUncorrectableErr",
"Condition": 1,
"LedFaultCode": "q$$"
},
"Event_PCIeCardCE": {
"EventKeyId": "PCIeCard.PCIeCardCEHardFailure",
"Condition": 1,
"LedFaultCode": "q$$"
},
"Event_PcieCardReplaceMntr": {
"EventKeyId": "PcieCard.PcieCardReplace",
"Condition": 1
},
"Event_PCIeLinkSpeed": {
"EventKeyId": "PCIeCard.PCIeCardLinkSpeedReduced",
"Condition": 1
},
"ThresholdSensor_GPUOutletTemp": {
"OwnerId": 32,
"OwnerLun": 0,
"EntityId": "<=/Entity_GPUCard.Id",
"EntityInstance": "<=/Entity_GPUCard.Instance",
"Initialization": 127,
"Capabilities": 104,
"SensorType": 1,
"ReadingType": 1,
"SensorName": "NPU${Slot} Outlet Temp",
"AssertMask": 0,
"DeassertMask": 0,
"ReadingMask": 2056,
"UpperNoncritical": 90,
"PositiveHysteresis": 2,
"Unit": 128,
"BaseUnit": 1,
"ModifierUnit": 0,
"Linearization": 0,
"M": 100,
"RBExp": 224,
"Analog": 1,
"NominalReading": 25,
"NormalMaximum": 0,
"NormalMinimum": 0,
"MaximumReading": 127,
"MinimumReading": 128,
"Reading": "<=/NPUCard_1.OutletTemperatureCelsius",
"ReadingStatus": "<=/NPUCard_1.OutletTemperatureCelsius |> expr($1 >= 255 ? 1 : 0)",
"SensorNumber": 255
},
"ThresholdSensor_GPUInletTemp": {
"OwnerId": 32,
"OwnerLun": 0,
"EntityId": "<=/Entity_GPUCard.Id",
"EntityInstance": "<=/Entity_GPUCard.Instance",
"Initialization": 127,
"Capabilities": 104,
"SensorType": 1,
"ReadingType": 1,
"SensorName": "NPU${Slot} Inlet Temp",
"AssertMask": 0,
"DeassertMask": 0,
"ReadingMask": 2056,
"UpperNoncritical": 90,
"PositiveHysteresis": 2,
"Unit": 128,
"BaseUnit": 1,
"ModifierUnit": 0,
"Linearization": 0,
"M": 100,
"RBExp": 224,
"Analog": 1,
"NominalReading": 25,
"NormalMaximum": 0,
"NormalMinimum": 0,
"MaximumReading": 127,
"MinimumReading": 128,
"Reading": "<=/NPUCard_1.InletTemperatureCelsius",
"ReadingStatus": "<=/NPUCard_1.InletTemperatureCelsius |> expr($1 >= 255 ? 1 : 0)"
},
"ThresholdSensor_GPUAICoreTemp": {
"OwnerId": 32,
"OwnerLun": 0,
"EntityId": "<=/Entity_GPUCard.Id",
"EntityInstance": "<=/Entity_GPUCard.Instance",
"Initialization": 127,
"Capabilities": 104,
"SensorType": 1,
"ReadingType": 1,
"SensorName": "NPU${Slot} AICORE Temp",
"AssertMask": 0,
"DeassertMask": 0,
"ReadingMask": 2056,
"UpperNoncritical": 105,
"PositiveHysteresis": 2,
"Unit": 128,
"BaseUnit": 1,
"ModifierUnit": 0,
"Linearization": 0,
"M": 100,
"RBExp": 224,
"Analog": 1,
"NominalReading": 25,
"NormalMaximum": 0,
"NormalMinimum": 0,
"MaximumReading": 127,
"MinimumReading": 128,
"Reading": "<=/NPUCard_1.Core0TemperatureCelsius",
"ReadingStatus": "<=/NPUCard_1.Core0TemperatureCelsius |> expr($1 >= 255 ? 1 : 0)"
},
"ThresholdSensor_GPUPower": {
"OwnerId": 32,
"OwnerLun": 0,
"EntityId": "<=/Entity_GPUCard.Id",
"EntityInstance": "<=/Entity_GPUCard.Instance",
"Initialization": 127,
"Capabilities": 104,
"SensorType": 11,
"ReadingType": 1,
"SensorName": "NPU${Slot} Power",
"AssertMask": 0,
"DeassertMask": 0,
"ReadingMask": 0,
"Unit": 0,
"BaseUnit": 6,
"ModifierUnit": 0,
"Linearization": 0,
"M": 12,
"RBExp": 240,
"Analog": 1,
"NominalReading": 0,
"NormalMaximum": 0,
"NormalMinimum": 0,
"MaximumReading": 255,
"MinimumReading": 0,
"Reading": "<=/NPUCard_1.PowerWatts |> expr($1 // 12)",
"ReadingStatus": "<=/NPUCard_1.PowerWatts |> expr($1 >= 16384 ? 1 : 0)"
},
"CoolingRequirement_1_65": {
"RequirementId": 65,
"Description": "Atlas 300I Pro芯片温度调速",
"MonitoringStatus": "<=/NPUCard_1.Core0TemperatureCelsius |> expr($1 == 32768 ? 1 : 0)",
"MonitoringValue": "<=/NPUCard_1.Core0TemperatureCelsius |> expr(($1 >= 255) ? 40 : ($1 & 255))",
"FailedValue": 80,
"TargetTemperatureCelsius": 87,
"MaxAllowedTemperatureCelsius": 95,
"SensorName": "#/ThresholdSensor_GPUAICoreTemp.SensorName"
},
"CoolingRequirement_1_66": {
"RequirementId": 66,
"Description": "Atlas 300I Pro出风口温度调速",
"MonitoringStatus": "<=/NPUCard_1.OutletTemperatureCelsius |> expr($1 == 32768 ? 1 : 0)",
"MonitoringValue": "<=/NPUCard_1.OutletTemperatureCelsius |> expr(($1 >= 255) ? 40 : ($1 & 255))",
"FailedValue": 80,
"TargetTemperatureCelsius": 80,
"MaxAllowedTemperatureCelsius": 85,
"SensorName": "#/ThresholdSensor_GPUOutletTemp.SensorName"
},
"CoolingRequirement_1_67": {
"RequirementId": 67,
"Description": "Atlas 300I Pro入风口温度调速",
"MonitoringStatus": "<=/NPUCard_1.InletTemperatureCelsius |> expr($1 == 32768 ? 1 : 0)",
"MonitoringValue": "<=/NPUCard_1.InletTemperatureCelsius |> expr(($1 >= 255) ? 40 : ($1 & 255))",
"FailedValue": 80,
"TargetTemperatureCelsius": 80,
"MaxAllowedTemperatureCelsius": 85,
"SensorName": "#/ThresholdSensor_GPUInletTemp.SensorName"
},
"Event_PCIeBandWidth": {
"EventKeyId": "PCIeCard.PCIeCardBandWidthDecreased",
"Condition": 1
},
"Event_AiCoreOverTemp": {
"EventKeyId": "PCIeCard.PCIeCardOverTemp",
"Condition": "#/ThresholdSensor_GPUAICoreTemp.UpperNoncritical",
"Hysteresis": "#/ThresholdSensor_GPUAICoreTemp.PositiveHysteresis"
},
"Event_OutletOverTemp": {
"EventKeyId": "PCIeCard.PCIeCardOverTemp",
"Condition": "#/ThresholdSensor_GPUOutletTemp.UpperNoncritical",
"Hysteresis": "#/ThresholdSensor_GPUOutletTemp.PositiveHysteresis"
},
"Event_InletOverTemp": {
"EventKeyId": "PCIeCard.PCIeCardOverTemp",
"Condition": "#/ThresholdSensor_GPUInletTemp.UpperNoncritical",
"Hysteresis": "#/ThresholdSensor_GPUInletTemp.PositiveHysteresis"
},
"Event_AirTempFail": {
"EventKeyId": "PcieCard.PCIeCardTempFail",
"Condition": 1
},
"Event_FirmwareFailure": {
"EventKeyId": "PcieCard.PCIeCardFirmwareFailure",
"Reading": "<=/NPUCard_1.FaultState |> expr(($1 & 8192) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "NPU",
"DescArg5": "(SPI Flash)",
"DescArg6": "#/NPUCard_1.FaultCode"
},
"Event_InspectFail": {
"EventKeyId": "PcieCard.PCIeCardInspectionFailure",
"Reading": "<=/NPUCard_1.FaultState;<=/NPUCard_1.PowerWatts |> expr((($1 & 4) == 0) && (($2 & 32768) == 0) ? 0 : 1)",
"Condition": 0,
"OperatorId": 6,
"AdditionalInfo": "2",
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName"
},
"Event_VoltFail": {
"EventKeyId": "PcieCard.PCIeCardAccessVoltFailure",
"Reading": "<=/NPUCard_1.FaultState |> expr(($1 & 256) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName",
"DescArg5": "#/NPUCard_1.FaultCode"
},
"Event_VoltUpper": {
"EventKeyId": "PcieCard.PCIeCardVoltageUpper",
"Reading": "<=/NPUCard_1.FaultState;<=/Entity_GPUCard.PowerState |> expr((($1 & 512) || (($2) == 0)) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName",
"DescArg4": "#/NPUCard_1.FaultCode"
},
"Event_VoltLower": {
"EventKeyId": "Cable.testAvoid",
"Reading": "<=/NPUCard_1.FaultState;<=/Entity_GPUCard.PowerState |> expr((($1 & 1024) || (($2) == 0)) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName",
"DescArg4": "#/NPUCard_1.FaultCode"
},
"Event_HealthFail": {
"EventKeyId": "PcieCard.PCIeCardHealthFail",
"Reading": "<=/NPUCard_1.FaultState |> expr(($1 & 2097152) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName",
"DescArg4": "#/NPUCard_1.FaultCode"
},
"Event_MiniFail": {
"EventKeyId": "PcieCard.PCIeCardMiniFail",
"Reading": "<=/NPUCard_1.FaultState |> expr(($1 & 4194304) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName",
"DescArg4": "#/NPUCard_1.FaultCode"
},
"Event_PowerUpDown": {
"EventKeyId": "PcieCard.PCIeCardPowerUpDown",
"Reading": "<=/NPUCard_1.FaultState |> expr(($1 & 33554432) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName",
"DescArg4": "#/NPUCard_1.FaultCode"
},
"Event_PowerFail": {
"EventKeyId": "PcieCard.PCIeCardPowerFail",
"Reading": "<=/NPUCard_1.FaultState |> expr(($1 & 16) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName",
"DescArg4": "#/NPUCard_1.FaultCode"
},
"Event_HardwareFail": {
"EventKeyId": "PcieCard.PCIeCardHardwareFail",
"Reading": "<=/NPUCard_1.FaultState |> expr(($1 & 65536) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg1": "#/NPUCard_1.FaultCode",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName"
},
"Event_PwrFail": {
"EventKeyId": "PcieCard.PCIeCardPwrFail",
"Reading": "<=/NPUCard_1.FaultState |> expr(($1 & 128) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName",
"DescArg4": "#/NPUCard_1.FaultCode"
},
"Event_OverCurrent": {
"EventKeyId": "PcieCard.PCIeCardOverCurrent",
"Reading": "<=/NPUCard_1.FaultState |> expr(($1 & 1073741824) == 0 ? 0 : 1)",
"Condition": 1,
"OperatorId": 5,
"Component": "#/Component_PCIeCard",
"DescArg2": "#/PCIeDevice_1.SlotID",
"DescArg3": "#/PCIeCard_1.BoardName",
"DescArg4": "#/NPUCard_1.FaultCode"
}
}
}
最终效果:
1.web界面第一次加载不会缓慢,正常加载:
2.ipmi查询卡的fru信息正常:
疑问:请问一下,目前发现新增的对象的属性无法实时同步到另外一个对象的属性中。比如:
"ThresholdSensor_GPUAICoreTemp": {
"OwnerId": 32,
"OwnerLun": 0,
"EntityId": "<=/Entity_GPUCard.Id",
"EntityInstance": "<=/Entity_GPUCard.Instance",
"Initialization": 127,
"Capabilities": 104,
"SensorType": 1,
"ReadingType": 1,
"SensorName": "NPU${Slot} AICORE Temp",
"AssertMask": 0,
"DeassertMask": 0,
"ReadingMask": 2056,
"UpperNoncritical": 105,
"PositiveHysteresis": 2,
"Unit": 128,
"BaseUnit": 1,
"ModifierUnit": 0,
"Linearization": 0,
"M": 100,
"RBExp": 224,
"Analog": 1,
"NominalReading": 25,
"NormalMaximum": 0,
"NormalMinimum": 0,
"MaximumReading": 127,
"MinimumReading": 128,
"Reading": "<=/wzAtlasModel.Core0TemperatureCelsius",
"ReadingStatus": "<=/wzAtlasModel.Core0TemperatureCelsius |> expr($1 >= 255 ? 1 : 0)"
},
,reading只会记录wzAtlasModel.Core0TemperatureCelsius的default数据,大佬们解答一下。

