combine coordinator supervisor connection event loop with main loop

This commit is contained in:
Mikayla Fischler 2023-07-11 13:32:26 -04:00
parent 1edee7f64b
commit a2e0999cea
2 changed files with 158 additions and 186 deletions

View File

@ -22,6 +22,8 @@ local SCADA_CRDN_TYPE = comms.SCADA_CRDN_TYPE
local UNIT_COMMAND = comms.UNIT_COMMAND local UNIT_COMMAND = comms.UNIT_COMMAND
local FAC_COMMAND = comms.FAC_COMMAND local FAC_COMMAND = comms.FAC_COMMAND
local LINK_TIMEOUT = 60.0
local coordinator = {} local coordinator = {}
-- request the user to select a monitor -- request the user to select a monitor
@ -227,9 +229,12 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
sv_seq_num = 0, sv_seq_num = 0,
sv_r_seq_num = nil, sv_r_seq_num = nil,
sv_config_err = false, sv_config_err = false,
connected = false,
last_est_ack = ESTABLISH_ACK.ALLOW, last_est_ack = ESTABLISH_ACK.ALLOW,
last_api_est_acks = {} last_api_est_acks = {},
est_start = 0,
est_last = 0,
est_tick_waiting = nil,
est_task_done = nil
} }
comms.set_trusted_range(range) comms.set_trusted_range(range)
@ -295,56 +300,30 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
---@class coord_comms ---@class coord_comms
local public = {} local public = {}
-- close the connection to the server -- try to connect to the supervisor if not already linked
function public.close() ---@param abort boolean? true to print out cancel info if not linked (use on program terminate)
sv_watchdog.cancel() ---@return boolean ok, boolean start_ui
self.sv_addr = comms.BROADCAST function public.try_connect(abort)
self.sv_linked = false local ok = true
self.sv_r_seq_num = nil local start_ui = false
iocontrol.fp_link_state(types.PANEL_LINK_STATE.DISCONNECTED)
_send_sv(PROTOCOL.SCADA_MGMT, SCADA_MGMT_TYPE.CLOSE, {})
end
-- attempt to connect to the subervisor if not self.sv_linked then
---@nodiscard if self.est_tick_waiting == nil then
---@param timeout_s number timeout in seconds self.est_start = util.time_s()
---@param tick_dmesg_waiting function callback to tick dmesg waiting self.est_last = self.est_start
---@param task_done function callback to show done on dmesg
---@return boolean sv_linked true if connected, false otherwise self.est_tick_waiting, self.est_task_done =
--- EVENT_CONSUMER: this function consumes events coordinator.log_comms_connecting("attempting to connect to configured supervisor on channel " .. svr_channel)
function public.sv_connect(timeout_s, tick_dmesg_waiting, task_done)
local clock = util.new_clock(1)
local start = util.time_s()
local terminated = false
_send_establish() _send_establish()
else
clock.start() self.est_tick_waiting(math.max(0, LINK_TIMEOUT - (util.time_s() - self.est_start)))
while (util.time_s() - start) < timeout_s and (not self.sv_linked) and (not self.sv_config_err) do
local event, p1, p2, p3, p4, p5 = util.pull_event()
if event == "timer" and clock.is_clock(p1) then
-- timed out attempt, try again
tick_dmesg_waiting(math.max(0, timeout_s - (util.time_s() - start)))
_send_establish()
clock.start()
elseif event == "timer" then
-- keep checking watchdog timers
apisessions.check_all_watchdogs(p1)
elseif event == "modem_message" then
-- handle message
local packet = public.parse_packet(p1, p2, p3, p4, p5)
public.handle_packet(packet)
elseif event == "terminate" then
terminated = true
break
end
end end
task_done(self.sv_linked) if abort or (util.time_s() - self.est_start) >= LINK_TIMEOUT then
self.est_task_done(false)
if terminated then if abort then
coordinator.log_comms("supervisor connection attempt cancelled by user") coordinator.log_comms("supervisor connection attempt cancelled by user")
elseif self.sv_config_err then elseif self.sv_config_err then
coordinator.log_comms("supervisor cooling configuration invalid, check supervisor config file") coordinator.log_comms("supervisor cooling configuration invalid, check supervisor config file")
@ -360,7 +339,32 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
end end
end end
return self.sv_linked ok = false
elseif self.sv_config_err then
coordinator.log_comms("supervisor cooling configuration invalid, check supervisor config file")
ok = false
elseif (util.time_s() - self.est_last) > 1.0 then
_send_establish()
self.est_last = util.time_s()
end
elseif self.est_tick_waiting ~= nil then
self.est_task_done(true)
self.est_tick_waiting = nil
self.est_task_done = nil
start_ui = true
end
return ok, start_ui
end
-- close the connection to the server
function public.close()
sv_watchdog.cancel()
self.sv_addr = comms.BROADCAST
self.sv_linked = false
self.sv_r_seq_num = nil
iocontrol.fp_link_state(types.PANEL_LINK_STATE.DISCONNECTED)
_send_sv(PROTOCOL.SCADA_MGMT, SCADA_MGMT_TYPE.CLOSE, {})
end end
-- send a facility command -- send a facility command
@ -426,7 +430,10 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
-- handle a packet -- handle a packet
---@param packet mgmt_frame|crdn_frame|capi_frame|nil ---@param packet mgmt_frame|crdn_frame|capi_frame|nil
---@return boolean close_ui
function public.handle_packet(packet) function public.handle_packet(packet)
local was_linked = self.sv_linked
if packet ~= nil then if packet ~= nil then
local l_chan = packet.scada_frame.local_channel() local l_chan = packet.scada_frame.local_channel()
local r_chan = packet.scada_frame.remote_channel() local r_chan = packet.scada_frame.remote_channel()
@ -436,7 +443,9 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
if l_chan ~= crd_channel then if l_chan ~= crd_channel then
log.debug("received packet on unconfigured channel " .. l_chan, true) log.debug("received packet on unconfigured channel " .. l_chan, true)
elseif r_chan == pkt_channel then elseif r_chan == pkt_channel then
if protocol == PROTOCOL.COORD_API then if not self.sv_linked then
log.debug("discarding pocket API packet before linked to supervisor")
elseif protocol == PROTOCOL.COORD_API then
---@cast packet capi_frame ---@cast packet capi_frame
-- look for an associated session -- look for an associated session
local session = apisessions.find_session(src_addr) local session = apisessions.find_session(src_addr)
@ -497,12 +506,12 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
-- check sequence number -- check sequence number
if self.sv_r_seq_num == nil then if self.sv_r_seq_num == nil then
self.sv_r_seq_num = packet.scada_frame.seq_num() self.sv_r_seq_num = packet.scada_frame.seq_num()
elseif self.connected and ((self.sv_r_seq_num + 1) ~= packet.scada_frame.seq_num()) then elseif self.sv_linked and ((self.sv_r_seq_num + 1) ~= packet.scada_frame.seq_num()) then
log.warning("sequence out-of-order: last = " .. self.sv_r_seq_num .. ", new = " .. packet.scada_frame.seq_num()) log.warning("sequence out-of-order: last = " .. self.sv_r_seq_num .. ", new = " .. packet.scada_frame.seq_num())
return return false
elseif self.sv_linked and src_addr ~= self.sv_addr then elseif self.sv_linked and src_addr ~= self.sv_addr then
log.debug("received packet from unknown computer " .. src_addr .. " while linked; channel in use by another system?") log.debug("received packet from unknown computer " .. src_addr .. " while linked; channel in use by another system?")
return return false
else else
self.sv_r_seq_num = packet.scada_frame.seq_num() self.sv_r_seq_num = packet.scada_frame.seq_num()
end end
@ -632,7 +641,37 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
end end
elseif protocol == PROTOCOL.SCADA_MGMT then elseif protocol == PROTOCOL.SCADA_MGMT then
---@cast packet mgmt_frame ---@cast packet mgmt_frame
if packet.type == SCADA_MGMT_TYPE.ESTABLISH then if self.sv_linked then
if packet.type == SCADA_MGMT_TYPE.KEEP_ALIVE then
-- keep alive request received, echo back
if packet.length == 1 then
local timestamp = packet.data[1]
local trip_time = util.time() - timestamp
if trip_time > 750 then
log.warning("coordinator KEEP_ALIVE trip time > 750ms (" .. trip_time .. "ms)")
end
-- log.debug("coordinator RTT = " .. trip_time .. "ms")
iocontrol.get_db().facility.ps.publish("sv_ping", trip_time)
_send_keep_alive_ack(timestamp)
else
log.debug("SCADA keep alive packet length mismatch")
end
elseif packet.type == SCADA_MGMT_TYPE.CLOSE then
-- handle session close
sv_watchdog.cancel()
self.sv_addr = comms.BROADCAST
self.sv_linked = false
self.sv_r_seq_num = nil
iocontrol.fp_link_state(types.PANEL_LINK_STATE.DISCONNECTED)
log.info("server connection closed by remote host")
else
log.debug("received unknown SCADA_MGMT packet type " .. packet.type)
end
elseif packet.type == SCADA_MGMT_TYPE.ESTABLISH then
-- connection with supervisor established -- connection with supervisor established
if packet.length == 2 then if packet.length == 2 then
local est_ack = packet.data[1] local est_ack = packet.data[1]
@ -662,6 +701,7 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
self.sv_addr = src_addr self.sv_addr = src_addr
self.sv_linked = true self.sv_linked = true
self.sv_r_seq_num = nil
self.sv_config_err = false self.sv_config_err = false
iocontrol.fp_link_state(types.PANEL_LINK_STATE.LINKED) iocontrol.fp_link_state(types.PANEL_LINK_STATE.LINKED)
@ -703,36 +743,6 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
else else
log.debug("SCADA_MGMT establish packet length mismatch") log.debug("SCADA_MGMT establish packet length mismatch")
end end
elseif self.sv_linked then
if packet.type == SCADA_MGMT_TYPE.KEEP_ALIVE then
-- keep alive request received, echo back
if packet.length == 1 then
local timestamp = packet.data[1]
local trip_time = util.time() - timestamp
if trip_time > 750 then
log.warning("coordinator KEEP_ALIVE trip time > 750ms (" .. trip_time .. "ms)")
end
-- log.debug("coordinator RTT = " .. trip_time .. "ms")
iocontrol.get_db().facility.ps.publish("sv_ping", trip_time)
_send_keep_alive_ack(timestamp)
else
log.debug("SCADA keep alive packet length mismatch")
end
elseif packet.type == SCADA_MGMT_TYPE.CLOSE then
-- handle session close
sv_watchdog.cancel()
self.sv_addr = comms.BROADCAST
self.sv_linked = false
self.sv_r_seq_num = nil
iocontrol.fp_link_state(types.PANEL_LINK_STATE.DISCONNECTED)
log.info("server connection closed by remote host")
else
log.debug("received unknown SCADA_MGMT packet type " .. packet.type)
end
else else
log.debug("discarding non-link SCADA_MGMT packet before linked") log.debug("discarding non-link SCADA_MGMT packet before linked")
end end
@ -743,6 +753,8 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
log.debug("received packet for unknown channel " .. r_chan, true) log.debug("received packet for unknown channel " .. r_chan, true)
end end
end end
return was_linked and not self.sv_linked
end end
-- check if the coordinator is still linked to the supervisor -- check if the coordinator is still linked to the supervisor

View File

@ -22,7 +22,7 @@ local sounder = require("coordinator.sounder")
local apisessions = require("coordinator.session.apisessions") local apisessions = require("coordinator.session.apisessions")
local COORDINATOR_VERSION = "v0.19.0" local COORDINATOR_VERSION = "v0.19.1"
local println = util.println local println = util.println
local println_ts = util.println_ts local println_ts = util.println_ts
@ -31,7 +31,6 @@ local log_graphics = coordinator.log_graphics
local log_sys = coordinator.log_sys local log_sys = coordinator.log_sys
local log_boot = coordinator.log_boot local log_boot = coordinator.log_boot
local log_comms = coordinator.log_comms local log_comms = coordinator.log_comms
local log_comms_connecting = coordinator.log_comms_connecting
local log_crypto = coordinator.log_crypto local log_crypto = coordinator.log_crypto
---------------------------------------- ----------------------------------------
@ -173,7 +172,7 @@ local function main()
local loop_clock = util.new_clock(MAIN_CLOCK) local loop_clock = util.new_clock(MAIN_CLOCK)
---------------------------------------- ----------------------------------------
-- start front panel -- start front panel & UI start function
---------------------------------------- ----------------------------------------
log_graphics("starting front panel UI...") log_graphics("starting front panel UI...")
@ -187,39 +186,9 @@ local function main()
return return
else log_graphics("front panel ready") end else log_graphics("front panel ready") end
---------------------------------------- -- start up the main UI
-- connect to the supervisor
----------------------------------------
-- attempt to connect to the supervisor or exit
local function init_connect_sv()
local tick_waiting, task_done = log_comms_connecting("attempting to connect to configured supervisor on channel " .. config.SVR_CHANNEL)
-- attempt to establish a connection with the supervisory computer
if not coord_comms.sv_connect(60, tick_waiting, task_done) then
log_sys("supervisor connection failed, shutting down...")
log.fatal("failed to connect to supervisor")
return false
end
return true
end
if not init_connect_sv() then
println("startup> failed to connect to supervisor")
log_sys("system shutdown")
return
else
log_sys("supervisor connected, proceeding to UI start")
end
----------------------------------------
-- start the UI
----------------------------------------
-- start up the UI
---@return boolean ui_ok started ok ---@return boolean ui_ok started ok
local function init_start_ui() local function start_main_ui()
log_graphics("starting main UI...") log_graphics("starting main UI...")
local draw_start = util.time_ms() local draw_start = util.time_ms()
@ -228,36 +197,29 @@ local function main()
if not ui_ok then if not ui_ok then
renderer.close_ui() renderer.close_ui()
log_graphics(util.c("main UI error: ", ui_message)) log_graphics(util.c("main UI error: ", ui_message))
println_ts("main UI creation failed")
log.fatal(util.c("main GUI render failed with error ", ui_message)) log.fatal(util.c("main GUI render failed with error ", ui_message))
else else
log_graphics("first main UI draw took " .. (util.time_ms() - draw_start) .. "ms") log_graphics("main UI draw took " .. (util.time_ms() - draw_start) .. "ms")
-- start clock
loop_clock.start()
end end
return ui_ok return ui_ok
end end
local ui_ok = init_start_ui()
---------------------------------------- ----------------------------------------
-- main event loop -- main event loop
---------------------------------------- ----------------------------------------
local link_failed = false
local ui_ok = true
local date_format = util.trinary(config.TIME_24_HOUR, "%X \x04 %A, %B %d %Y", "%r \x04 %A, %B %d %Y") local date_format = util.trinary(config.TIME_24_HOUR, "%X \x04 %A, %B %d %Y", "%r \x04 %A, %B %d %Y")
if ui_ok then -- start clock
-- start connection watchdog loop_clock.start()
conn_watchdog.feed()
log.debug("startup> conn watchdog started")
log_sys("system started successfully") log_sys("system started successfully")
end
-- main event loop -- main event loop
while ui_ok do while true do
local event, param1, param2, param3, param4, param5 = util.pull_event() local event, param1, param2, param3, param4, param5 = util.pull_event()
-- handle event -- handle event
@ -271,7 +233,6 @@ local function main()
if nic.is_modem(device) then if nic.is_modem(device) then
nic.disconnect() nic.disconnect()
log_sys("comms modem disconnected") log_sys("comms modem disconnected")
println_ts("wireless modem disconnected!")
-- close out UI -- close out UI
renderer.close_ui() renderer.close_ui()
@ -287,18 +248,13 @@ local function main()
if renderer.is_monitor_used(device) then if renderer.is_monitor_used(device) then
---@todo will be handled properly in #249 ---@todo will be handled properly in #249
-- "halt and catch fire" style handling -- "halt and catch fire" style handling
local msg = "lost a configured monitor, system will now exit" log_sys("lost a configured monitor, system will now exit")
println_ts(msg)
log_sys(msg)
break break
else else
log_sys("lost unused monitor, ignoring") log_sys("lost unused monitor, ignoring")
end end
elseif type == "speaker" then elseif type == "speaker" then
local msg = "lost alarm sounder speaker" log_sys("lost alarm sounder speaker")
println_ts(msg)
log_sys(msg)
iocontrol.fp_has_speaker(false) iocontrol.fp_has_speaker(false)
end end
end end
@ -309,15 +265,8 @@ local function main()
if type == "modem" then if type == "modem" then
if device.isWireless() then if device.isWireless() then
-- reconnected modem -- reconnected modem
nic.connect(device)
log_sys("comms modem reconnected") log_sys("comms modem reconnected")
println_ts("wireless modem reconnected.") nic.connect(device)
-- re-init system
if not init_connect_sv() then break end
ui_ok = init_start_ui()
iocontrol.fp_has_modem(true) iocontrol.fp_has_modem(true)
else else
log_sys("wired modem reconnected") log_sys("wired modem reconnected")
@ -326,10 +275,7 @@ local function main()
---@todo will be handled properly in #249 ---@todo will be handled properly in #249
-- not supported, system will exit on loss of in-use monitors -- not supported, system will exit on loss of in-use monitors
elseif type == "speaker" then elseif type == "speaker" then
local msg = "alarm sounder speaker reconnected" log_sys("alarm sounder speaker reconnected")
println_ts(msg)
log_sys(msg)
sounder.reconnect(device) sounder.reconnect(device)
iocontrol.fp_has_speaker(true) iocontrol.fp_has_speaker(true)
end end
@ -337,8 +283,25 @@ local function main()
elseif event == "timer" then elseif event == "timer" then
if loop_clock.is_clock(param1) then if loop_clock.is_clock(param1) then
-- main loop tick -- main loop tick
-- toggle heartbeat
iocontrol.heartbeat() iocontrol.heartbeat()
-- maintain connection
if nic.connected() then
local ok, start_ui = coord_comms.try_connect()
if not ok then
link_failed = true
log_sys("supervisor connection failed, shutting down...")
log.fatal("failed to connect to supervisor")
break
elseif start_ui then
log_sys("supervisor connected, proceeding to main UI start")
ui_ok = start_main_ui()
if not ui_ok then break end
end
end
-- iterate sessions -- iterate sessions
apisessions.iterate_all() apisessions.iterate_all()
@ -346,25 +309,19 @@ local function main()
apisessions.free_all_closed() apisessions.free_all_closed()
-- update date and time string for main display -- update date and time string for main display
if coord_comms.is_linked() then
iocontrol.get_db().facility.ps.publish("date_time", os.date(date_format)) iocontrol.get_db().facility.ps.publish("date_time", os.date(date_format))
end
loop_clock.start() loop_clock.start()
elseif conn_watchdog.is_timer(param1) then elseif conn_watchdog.is_timer(param1) then
-- supervisor watchdog timeout -- supervisor watchdog timeout
local msg = "supervisor server timeout" log_comms("supervisor server timeout")
log_comms(msg)
println_ts(msg)
-- close connection, UI, and stop sounder -- close connection, main UI, and stop sounder
coord_comms.close() coord_comms.close()
renderer.close_ui() renderer.close_ui()
sounder.stop() sounder.stop()
if nic.connected() then
-- try to re-connect to the supervisor
if not init_connect_sv() then break end
ui_ok = init_start_ui()
end
else else
-- a non-clock/main watchdog timer event -- a non-clock/main watchdog timer event
@ -377,22 +334,15 @@ local function main()
elseif event == "modem_message" then elseif event == "modem_message" then
-- got a packet -- got a packet
local packet = coord_comms.parse_packet(param1, param2, param3, param4, param5) local packet = coord_comms.parse_packet(param1, param2, param3, param4, param5)
coord_comms.handle_packet(packet)
-- check if it was a disconnect -- handle then check if it was a disconnect
if not coord_comms.is_linked() then if coord_comms.handle_packet(packet) then
log_comms("supervisor closed connection") log_comms("supervisor closed connection")
-- close connection, UI, and stop sounder -- close connection, UI, and stop sounder
coord_comms.close() coord_comms.close()
renderer.close_ui() renderer.close_ui()
sounder.stop() sounder.stop()
if nic.connected() then
-- try to re-connect to the supervisor
if not init_connect_sv() then break end
ui_ok = init_start_ui()
end
end end
elseif event == "monitor_touch" or event == "mouse_click" or event == "mouse_up" or elseif event == "monitor_touch" or event == "mouse_click" or event == "mouse_up" or
event == "mouse_drag" or event == "mouse_scroll" then event == "mouse_drag" or event == "mouse_scroll" then
@ -405,10 +355,17 @@ local function main()
-- check for termination request -- check for termination request
if event == "terminate" or ppm.should_terminate() then if event == "terminate" or ppm.should_terminate() then
println_ts("terminate requested, closing connections...") -- handle supervisor connection
link_failed = coord_comms.try_connect(true)
if coord_comms.is_linked() then
log_comms("terminate requested, closing supervisor connection...") log_comms("terminate requested, closing supervisor connection...")
else link_failed = true end
coord_comms.close() coord_comms.close()
log_comms("supervisor connection closed") log_comms("supervisor connection closed")
-- handle API sessions
log_comms("closing api sessions...") log_comms("closing api sessions...")
apisessions.close_all() apisessions.close_all()
log_comms("api sessions closed") log_comms("api sessions closed")
@ -421,6 +378,9 @@ local function main()
sounder.stop() sounder.stop()
log_sys("system shutdown") log_sys("system shutdown")
if link_failed then println_ts("failed to connect to supervisor") end
if not ui_ok then println_ts("main UI creation failed") end
println_ts("exited") println_ts("exited")
log.info("exited") log.info("exited")
end end