sr:vendor/Mellanox/CX-7/14140130_15b31021_15b30022.sr
调速策略描述:光模块温度获取正常时使用网卡温度目标值90,光模块温度异常后调整网卡目标温度为60
正常时使用的Scanner_IBTemp调速,但是光模块异常却是使用的NetworkAdapter_1.TemperatureCelsius。
应当是使用相同的温度来源
一个是光模块的调速,一个是网卡的调速,两个不使用相同的数据源才是对的
"CoolingRequirement_1_93": {
"RequirementId": "${Slot} |> expr((93 << 8) | $1)",
"Description": "CX7网卡调速",
"MonitoringStatus": "<=/Scanner_IBTemp.Status",
"MonitoringValue": "<=/Scanner_IBTemp.Value",
"FailedValue": 80,
"TargetTemperatureCelsius": 90,
"MaxAllowedTemperatureCelsius": 100,
"TargetTemperatureRangeCelsius": [],
"ThresholdValue": [],
"AlarmSpeed": [],
"SmartCoolingTargetTemperature": [],
"CustomSupported": false,
"CustomTargetTemperatureCelsius": 255,
"SensorName": "#/ThresholdSensor_Temp.SensorName",
"Enabled": "<=/CoolingRequirement_1_95.MonitoringStatus |> expr($1 == 0 ? true : false)"
},
"CoolingRequirement_1_94": {
"RequirementId": "${Slot} |> expr((94 << 8) | $1)",
"Description": "CX7网卡芯片调速(光模块异常)",
"MonitoringStatus": "<=/NetworkAdapter_1.TemperatureStatus",
"MonitoringValue": "<=/NetworkAdapter_1.TemperatureCelsius",
"FailedValue": 80,
"TargetTemperatureCelsius": 60,
"MaxAllowedTemperatureCelsius": 100,
"TargetTemperatureRangeCelsius": [],
"ThresholdValue": [],
"AlarmSpeed": [],
"SmartCoolingTargetTemperature": [],
"CustomSupported": false,
"CustomTargetTemperatureCelsius": 255,
"Enabled": "<=/CoolingRequirement_1_95.MonitoringStatus |> expr($1 ~= 0 ? true : false)",
"SensorName": "#/ThresholdSensor_Temp.SensorName"
},
"CoolingRequirement_1_95": {
"RequirementId": "${Slot} |> expr((95 << 8) | $1)",
"Description": "CX7网卡光模块温度调速",
"MonitoringStatus": "<=/OpticalModule_1.TemperatureCelsius; <=/OpticalModule_2.TemperatureCelsius |> expr($1 == 32768 || $2 == 32768 ? 1 : 0)",
"MonitoringValue": "<=/OpticalModule_1.TemperatureCelsius; <=/OpticalModule_2.TemperatureCelsius |> expr(($1 >= 255 ? 0 : $1) >= ($2 >= 255 ? 0 : $2) ? ($1 >= 255 ? 0 : $1) : ($2 >= 255 ? 0 : $2))",
"TargetTemperatureCelsius": 55,
"MaxAllowedTemperatureCelsius": 75,
"TargetTemperatureRangeCelsius": [],
"ThresholdValue": [],
"AlarmSpeed": [],
"SmartCoolingTargetTemperature": [60, 55, 65],
"CustomSupported": false,
"CustomTargetTemperatureCelsius": 255,
"Enabled": "<=/OpticalModule_1.Presence; <=/OpticalModule_2.Presence |> expr(($1 == 1 || $2 == 1) ? true : false)",
"SensorName": "#/ThresholdSensor_OpticalModuleTemp.SensorName"
}
CoolingRequirement_1_93 是光模块温度获取正常时卡的调速。CoolingRequirement_1_94是光模块温度获取异常时卡的调速。但是温度来源不一致
这张卡是支持mctp获取温度的,Scanner_IBTemp和NetworkAdapter_1.TemperatureCelsius获取到的应该是相同的才对,如果不同的话说明已经出问题了,出问题是要解决的,这不是2个数据源,是一个数据源
可以在设备上对比一下两个数值。两个来源应该都是由网卡提供的,只不过一个是通过iic读取,一个是通过带外获取。
温度是一致的。配置的比较抽象,维护了两套的状态和温度