#589 initial attempt at reboot recovery

This commit is contained in:
Mikayla
2025-02-08 20:35:04 +00:00
parent cbc84c5998
commit b3cf40a01a
10 changed files with 266 additions and 84 deletions

View File

@ -380,6 +380,18 @@ function coordinator.comms(version, nic, sv_watchdog)
_send_sv(PROTOCOL.SCADA_MGMT, MGMT_TYPE.CLOSE, {})
end
-- send the resume ready state to the supervisor
---@param mode PROCESS process control mode
---@param burn_target number burn rate target
---@param charge_target number charge level target
---@param gen_target number generation rate target
---@param limits number[] unit burn rate limits
function public.send_ready(mode, burn_target, charge_target, gen_target, limits)
_send_sv(PROTOCOL.SCADA_CRDN, CRDN_TYPE.PROCESS_READY, {
mode, burn_target, charge_target, gen_target, limits
})
end
-- send a facility command
---@param cmd FAC_COMMAND command
---@param option any? optional option options for the optional options (like waste mode)

View File

@ -139,6 +139,11 @@ function process.init(iocontrol, coord_comms)
log.info("PROCESS: loaded priority groups settings")
end
-- report to the supervisor all initial configuration data has been sent
-- startup resume can occur if needed
local p = ctl_proc
pctl.comms.send_ready(p.mode, p.burn_target, p.charge_target, p.gen_target, p.limits)
end
-- create a handle to process control for usage of commands that get acknowledgements

View File

@ -17,7 +17,7 @@ local max_distance = nil
local comms = {}
-- protocol/data versions (protocol/data independent changes tracked by util.lua version)
comms.version = "3.0.4"
comms.version = "3.0.5"
comms.api_version = "0.0.9"
---@enum PROTOCOL
@ -60,18 +60,19 @@ local MGMT_TYPE = {
---@enum CRDN_TYPE
local CRDN_TYPE = {
INITIAL_BUILDS = 0, -- initial, complete builds packet to the coordinator
FAC_BUILDS = 1, -- facility RTU builds
FAC_STATUS = 2, -- state of facility and facility devices
FAC_CMD = 3, -- faility command
UNIT_BUILDS = 4, -- build of each reactor unit (reactor + RTUs)
UNIT_STATUSES = 5, -- state of each of the reactor units
UNIT_CMD = 6, -- command a reactor unit
API_GET_FAC = 7, -- API: get the facility general data
API_GET_FAC_DTL = 8, -- API: get (detailed) data for the facility app
API_GET_UNIT = 9, -- API: get reactor unit data
API_GET_CTRL = 10, -- API: get data for the control app
API_GET_PROC = 11, -- API: get data for the process app
API_GET_WASTE = 12 -- API: get data for the waste app
PROCESS_READY = 1, -- process init is complete + last set of info for supervisor startup recovery
FAC_BUILDS = 2, -- facility RTU builds
FAC_STATUS = 3, -- state of facility and facility devices
FAC_CMD = 4, -- faility command
UNIT_BUILDS = 5, -- build of each reactor unit (reactor + RTUs)
UNIT_STATUSES = 6, -- state of each of the reactor units
UNIT_CMD = 7, -- command a reactor unit
API_GET_FAC = 8, -- API: get the facility general data
API_GET_FAC_DTL = 9, -- API: get (detailed) data for the facility app
API_GET_UNIT = 10, -- API: get reactor unit data
API_GET_CTRL = 11, -- API: get data for the control app
API_GET_PROC = 12, -- API: get data for the process app
API_GET_WASTE = 13 -- API: get data for the waste app
}
---@enum ESTABLISH_ACK

View File

@ -5,6 +5,7 @@ local util = require("scada-common.util")
local unit = require("supervisor.unit")
local fac_update = require("supervisor.facility_update")
local plc = require("supervisor.session.plc")
local rsctl = require("supervisor.session.rsctl")
local svsessions = require("supervisor.session.svsessions")
@ -31,6 +32,17 @@ local START_STATUS = {
BLADE_MISMATCH = 2
}
---@enum RECOVERY_STATE
local RCV_STATE = {
INACTIVE = 0,
PRIMED = 1,
RUNNING = 2,
STOPPED = 3
}
local CHARGE_SCALER = 1000000 -- convert MFE to FE
local GEN_SCALER = 1000 -- convert kFE to FE
---@class facility_management
local facility = {}
@ -66,12 +78,15 @@ function facility.new(config)
-- redstone I/O control
io_ctl = nil, ---@type rs_controller
-- process control
recovery = RCV_STATE.INACTIVE, ---@type RECOVERY_STATE
recovery_boot_state = nil, ---@type sv_control_state|nil
last_unit_states = nil, ---@type boolean[]
units_ready = false,
mode = PROCESS.INACTIVE,
last_mode = PROCESS.INACTIVE,
return_mode = PROCESS.INACTIVE,
mode_set = PROCESS.MAX_BURN,
start_fail = START_STATUS.OK,
mode = PROCESS.INACTIVE, ---@type PROCESS
last_mode = PROCESS.INACTIVE, ---@type PROCESS
return_mode = PROCESS.INACTIVE, ---@type PROCESS
mode_set = PROCESS.MAX_BURN, ---@type PROCESS
start_fail = START_STATUS.OK, ---@type START_STATUS
max_burn_combined = 0.0, -- maximum burn rate to clamp at
burn_target = 0.1, -- burn rate target for aggregate burn mode
charge_setpoint = 0, -- FE charge target setpoint
@ -101,8 +116,8 @@ function facility.new(config)
last_error = 0.0,
last_time = 0.0,
-- waste processing
waste_product = WASTE.PLUTONIUM,
current_waste_product = WASTE.PLUTONIUM,
waste_product = WASTE.PLUTONIUM, ---@type WASTE_PRODUCT
current_waste_product = WASTE.PLUTONIUM, ---@type WASTE_PRODUCT
pu_fallback = false,
sps_low_power = false,
disabled_sps = false,
@ -126,14 +141,16 @@ function facility.new(config)
imtx_faulted_times = { 0, 0, 0 }
}
--#region SETUP
-- provide self to facility update functions
local f_update = fac_update(self)
-- create units
for i = 1, config.UnitCount do
table.insert(self.units,
unit.new(i, self.cooling_conf.r_cool[i].BoilerCount, self.cooling_conf.r_cool[i].TurbineCount, config.ExtChargeIdling))
table.insert(self.units, unit.new(i, self.cooling_conf.r_cool[i].BoilerCount, self.cooling_conf.r_cool[i].TurbineCount, config.ExtChargeIdling))
table.insert(self.group_map, AUTO_GROUP.MANUAL)
table.insert(self.last_unit_states, false)
end
-- list for RTU session management
@ -149,6 +166,62 @@ function facility.new(config)
table.insert(self.test_tone_states, false)
end
--#endregion
-- PRIVATE FUNCTIONS --
---@param auto_cfg start_auto_config configuration
---@return boolean ready, number[] unit_limits
local function _auto_check_and_save(auto_cfg)
local ready = false
-- load up current limits
local limits = {}
for i = 1, config.UnitCount do
limits[i] = self.units[i].get_control_inf().lim_br100 * 100
end
-- only allow changes if not running
if self.mode == PROCESS.INACTIVE then
if (type(auto_cfg.mode) == "number") and (auto_cfg.mode > PROCESS.INACTIVE) and (auto_cfg.mode <= PROCESS.GEN_RATE) then
self.mode_set = auto_cfg.mode
end
if (type(auto_cfg.burn_target) == "number") and auto_cfg.burn_target >= 0.1 then
self.burn_target = auto_cfg.burn_target
end
if (type(auto_cfg.charge_target) == "number") and auto_cfg.charge_target >= 0 then
self.charge_setpoint = auto_cfg.charge_target * CHARGE_SCALER
end
if (type(auto_cfg.gen_target) == "number") and auto_cfg.gen_target >= 0 then
self.gen_rate_setpoint = auto_cfg.gen_target * GEN_SCALER
end
if (type(auto_cfg.limits) == "table") and (#auto_cfg.limits == config.UnitCount) then
for i = 1, config.UnitCount do
local limit = auto_cfg.limits[i]
if (type(limit) == "number") and (limit >= 0.1) then
limits[i] = limit
self.units[i].set_burn_limit(limit)
end
end
end
ready = self.mode_set > 0
if ((self.mode_set == PROCESS.CHARGE) and (self.charge_setpoint <= 0)) or
((self.mode_set == PROCESS.GEN_RATE) and (self.gen_rate_setpoint <= 0)) or
((self.mode_set == PROCESS.BURN_RATE) and (self.burn_target < 0.1)) then
ready = false
end
end
return ready, limits
end
-- PUBLIC FUNCTIONS --
---@class facility
@ -239,6 +312,42 @@ function facility.new(config)
-- update (iterate) the facility management
function public.update()
-- attempt reboot recovery if in progress
if self.recovery == RCV_STATE.RUNNING then
-- try to start auto control
if self.recovery_boot_state.mode ~= nil and self.units_ready then
self.recovery_boot_state.mode = nil
self.mode = self.mode_set
log.info("FAC: process startup resume initiated")
end
local recovered = self.recovery_boot_state.mode == nil
-- restore manual control reactors
for i = 1, #self.units do
if self.recovery_boot_state.unit_states[i] and self.group_map[i] == AUTO_GROUP.MANUAL then
recovered = false
if self.units[i].get_control_inf().ready then
local plc_s = svsessions.get_reactor_session(i)
if plc_s ~= nil then
plc_s.in_queue.push_command(plc.PLC_S_CMDS.ENABLE)
log.info("FAC: startup resume enabling manually controlled reactor unit #" .. i)
-- only execute once
self.recovery_boot_state.unit_states[i] = nil
end
end
end
end
if recovered then
self.recovery = RCV_STATE.STOPPED
self.recovery_boot_state = nil
log.info("FAC: startup resume complete")
end
end
-- run process control and evaluate automatic SCRAM
f_update.pre_auto()
f_update.auto_control(config.ExtChargeIdling)
@ -267,6 +376,35 @@ function facility.new(config)
--#endregion
--#region Startup Recovery
---@param state sv_control_state
function public.startup_recovery_init(state)
if self.recovery == RCV_STATE.INACTIVE then
self.recovery_boot_state = state
self.recovery = RCV_STATE.PRIMED
end
end
-- attempt startup recovery
---@param auto_cfg start_auto_config configuration
function public.startup_recovery_start(auto_cfg)
if self.recovery == RCV_STATE.PRIMED and self.recovery_boot_state and
self.recovery_boot_state.mode ~= PROCESS.INACTIVE and self.recovery_boot_state.mode ~= PROCESS.SYSTEM_ALARM_IDLE then
self.recovery = util.trinary(_auto_check_and_save(auto_cfg), RCV_STATE.RUNNING, RCV_STATE.STOPPED)
log.info(util.c("FAC: startup resume ", util.trinary(self.recovery == RCV_STATE.RUNNING, "ready", "failed")))
else self.recovery = RCV_STATE.STOPPED end
end
-- used on certain coordinator commands to end reboot recovery (remain in current operational state)
function public.cancel_recovery()
self.recovery = RCV_STATE.STOPPED
self.recovery_boot_state = nil
log.info("FAC: process startup resume cancelled by user operation")
end
--#endregion
--#region Commands
-- SCRAM all reactor units
@ -290,59 +428,13 @@ function facility.new(config)
function public.auto_stop() self.mode = PROCESS.INACTIVE end
-- set automatic control configuration and start the process
---@param auto_cfg sys_auto_config configuration
---@param auto_cfg start_auto_config configuration
---@return table response ready state (successfully started) and current configuration (after updating)
function public.auto_start(auto_cfg)
local charge_scaler = 1000000 -- convert MFE to FE
local gen_scaler = 1000 -- convert kFE to FE
local ready = false
local ready, limits = _auto_check_and_save(auto_cfg)
-- load up current limits
local limits = {}
for i = 1, config.UnitCount do
limits[i] = self.units[i].get_control_inf().lim_br100 * 100
end
-- only allow changes if not running
if self.mode == PROCESS.INACTIVE then
if (type(auto_cfg.mode) == "number") and (auto_cfg.mode > PROCESS.INACTIVE) and (auto_cfg.mode <= PROCESS.GEN_RATE) then
self.mode_set = auto_cfg.mode
end
if (type(auto_cfg.burn_target) == "number") and auto_cfg.burn_target >= 0.1 then
self.burn_target = auto_cfg.burn_target
end
if (type(auto_cfg.charge_target) == "number") and auto_cfg.charge_target >= 0 then
self.charge_setpoint = auto_cfg.charge_target * charge_scaler
end
if (type(auto_cfg.gen_target) == "number") and auto_cfg.gen_target >= 0 then
self.gen_rate_setpoint = auto_cfg.gen_target * gen_scaler
end
if (type(auto_cfg.limits) == "table") and (#auto_cfg.limits == config.UnitCount) then
for i = 1, config.UnitCount do
local limit = auto_cfg.limits[i]
if (type(limit) == "number") and (limit >= 0.1) then
limits[i] = limit
self.units[i].set_burn_limit(limit)
end
end
end
ready = self.mode_set > 0
if ((self.mode_set == PROCESS.CHARGE) and (self.charge_setpoint <= 0)) or
((self.mode_set == PROCESS.GEN_RATE) and (self.gen_rate_setpoint <= 0)) or
((self.mode_set == PROCESS.BURN_RATE) and (self.burn_target < 0.1)) then
ready = false
end
ready = ready and self.units_ready
if ready then self.mode = self.mode_set end
if ready and self.units_ready then
self.mode = self.mode_set
end
log.debug(util.c("FAC: process start ", util.trinary(ready, "accepted", "rejected")))
@ -351,8 +443,8 @@ function facility.new(config)
ready,
self.mode_set,
self.burn_target,
self.charge_setpoint / charge_scaler,
self.gen_rate_setpoint / gen_scaler,
self.charge_setpoint / CHARGE_SCALER,
self.gen_rate_setpoint / GEN_SCALER,
limits
}
end

View File

@ -650,8 +650,16 @@ function update.auto_safety()
end
end
-- update last mode and set next mode
-- update last mode, set next mode, and update saved state as needed
function update.post_auto()
if self.mode ~= next_mode then
settings.set("LastProcessState", next_mode)
local saved = settings.save("/supervisor.settings")
if not saved then
log.warning("facility_update.post_auto(): failed to save supervisor settings file")
end
end
self.last_mode = self.mode
self.mode = next_mode
end
@ -792,6 +800,7 @@ end
function update.unit_mgmt()
local insufficent_po_rate = false
local need_emcool = false
local write_state = false
for i = 1, #self.units do
local u = self.units[i]
@ -807,6 +816,22 @@ function update.unit_mgmt()
if (self.cooling_conf.fac_tank_mode > 0) and u.is_emer_cool_tripped() and (self.cooling_conf.fac_tank_defs[i] == 2) then
need_emcool = true
end
-- check for control state changes to save
if self.last_unit_states[i] ~= u.get_control_state() then
self.last_unit_states[i] = u.get_control_state()
write_state = true
end
end
-- record unit control states
if write_state then
settings.set("LastUnitStates", self.last_unit_states)
local saved = settings.save("/supervisor.settings")
if not saved then
log.warning("facility_update.unit_mgmt(): failed to save supervisor settings file")
end
end
-- update waste product

View File

@ -234,6 +234,23 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim
if pkt.type == CRDN_TYPE.INITIAL_BUILDS then
-- acknowledgement to coordinator receiving builds
self.acks.builds = true
elseif pkt.type == CRDN_TYPE.PROCESS_READY then
if pkt.length == 5 then
-- coordinator has sent all initial process data, power-on recovery is now possible
---@type start_auto_config
local config = {
mode = pkt.data[1], ---@type PROCESS
burn_target = pkt.data[2], ---@type number
charge_target = pkt.data[3], ---@type number
gen_target = pkt.data[4], ---@type number
limits = pkt.data[5] ---@type number[]
}
facility.startup_recovery_start(config)
else
log.debug(log_tag .. "CRDN process ready packet length mismatch")
end
elseif pkt.type == CRDN_TYPE.FAC_BUILDS then
-- acknowledgement to coordinator receiving builds
self.acks.fac_builds = true
@ -243,8 +260,11 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim
if cmd == FAC_COMMAND.SCRAM_ALL then
facility.scram_all()
facility.cancel_recovery()
_send(CRDN_TYPE.FAC_CMD, { cmd, true })
elseif cmd == FAC_COMMAND.STOP then
facility.cancel_recovery()
local was_active = facility.auto_is_active()
if was_active then
@ -253,15 +273,16 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim
_send(CRDN_TYPE.FAC_CMD, { cmd, was_active })
elseif cmd == FAC_COMMAND.START then
facility.cancel_recovery()
if pkt.length == 6 then
---@type sys_auto_config
---@diagnostic disable-next-line: missing-fields
---@class start_auto_config
local config = {
mode = pkt.data[2],
burn_target = pkt.data[3],
charge_target = pkt.data[4],
gen_target = pkt.data[5],
limits = pkt.data[6]
mode = pkt.data[2], ---@type PROCESS
burn_target = pkt.data[3], ---@type number
charge_target = pkt.data[4], ---@type number
gen_target = pkt.data[5], ---@type number
limits = pkt.data[6] ---@type number[]
}
_send(CRDN_TYPE.FAC_CMD, { cmd, table.unpack(facility.auto_start(config)) })
@ -313,8 +334,11 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim
local manual = facility.get_group(uid) == AUTO_GROUP.MANUAL
if cmd == UNIT_COMMAND.SCRAM then
facility.cancel_recovery()
out_queue.push_data(SV_Q_DATA.SCRAM, data)
elseif cmd == UNIT_COMMAND.START then
facility.cancel_recovery()
if manual then
out_queue.push_data(SV_Q_DATA.START, data)
else
@ -324,6 +348,8 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim
elseif cmd == UNIT_COMMAND.RESET_RPS then
out_queue.push_data(SV_Q_DATA.RESET_RPS, data)
elseif cmd == UNIT_COMMAND.SET_BURN then
facility.cancel_recovery()
if pkt.length == 3 then
if manual then
out_queue.push_data(SV_Q_DATA.SET_BURN, data)
@ -354,6 +380,8 @@ function coordinator.new_session(id, s_addr, i_seq_num, in_queue, out_queue, tim
log.debug(log_tag .. "CRDN unit command reset alarm missing alarm id")
end
elseif cmd == UNIT_COMMAND.SET_GROUP then
facility.cancel_recovery()
if (pkt.length == 3) and (type(pkt.data[3]) == "number") and
(pkt.data[3] >= AUTO_GROUP.MANUAL) and (pkt.data[3] <= AUTO_GROUP.BACKUP) then
facility.set_group(unit.get_id(), pkt.data[3])

View File

@ -61,7 +61,6 @@ function plc.new_session(id, s_addr, i_seq_num, reactor_id, in_queue, out_queue,
local log_tag = "plc_session(" .. id .. "): "
local self = {
commanded_state = false,
commanded_burn_rate = 0.0,
auto_cmd_token = 0,
ramping_rate = false,

View File

@ -147,6 +147,9 @@ local function main()
-- halve the rate heartbeat LED flash
local heartbeat_toggle = true
-- init startup recovery
sv_facility.startup_recovery_init(supervisor.boot_state)
-- event loop
while true do
local event, param1, param2, param3, param4, param5 = util.pull_event()

View File

@ -19,10 +19,21 @@ local config = {}
supervisor.config = config
-- load the supervisor configuration
-- load the supervisor configuration and startup state
function supervisor.load_config()
if not settings.load("/supervisor.settings") then return false end
---@class sv_control_state
local boot_state = {
mode = settings.get("LastProcessState"), ---@type PROCESS
unit_states = settings.get("LastUnitStates") ---@type boolean[]
}
-- only record boot state if likely valid
if type(boot_state.mode) == "number" and type(boot_state.unit_states) == "table" then
supervisor.boot_state = boot_state
end
config.UnitCount = settings.get("UnitCount")
config.CoolingConfig = settings.get("CoolingConfig")
config.FacilityTankMode = settings.get("FacilityTankMode")

View File

@ -917,6 +917,12 @@ function unit.new(reactor_id, num_boilers, num_turbines, ext_idle)
return status
end
-- check the commanded control state of the reactor (if connected)
---@nodiscard
function public.get_control_state()
if self.plc_i ~= nil then return self.plc_i.get_db().control_state else return false end
end
-- get the current burn rate (actual rate)
---@nodiscard
function public.get_burn_rate()