combine coordinator supervisor connection event loop with main loop

This commit is contained in:
Mikayla Fischler 2023-07-11 13:32:26 -04:00
parent 1edee7f64b
commit a2e0999cea
2 changed files with 158 additions and 186 deletions

View File

@ -22,6 +22,8 @@ local SCADA_CRDN_TYPE = comms.SCADA_CRDN_TYPE
local UNIT_COMMAND = comms.UNIT_COMMAND
local FAC_COMMAND = comms.FAC_COMMAND
local LINK_TIMEOUT = 60.0
local coordinator = {}
-- request the user to select a monitor
@ -227,9 +229,12 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
sv_seq_num = 0,
sv_r_seq_num = nil,
sv_config_err = false,
connected = false,
last_est_ack = ESTABLISH_ACK.ALLOW,
last_api_est_acks = {}
last_api_est_acks = {},
est_start = 0,
est_last = 0,
est_tick_waiting = nil,
est_task_done = nil
}
comms.set_trusted_range(range)
@ -295,56 +300,30 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
---@class coord_comms
local public = {}
-- close the connection to the server
function public.close()
sv_watchdog.cancel()
self.sv_addr = comms.BROADCAST
self.sv_linked = false
self.sv_r_seq_num = nil
iocontrol.fp_link_state(types.PANEL_LINK_STATE.DISCONNECTED)
_send_sv(PROTOCOL.SCADA_MGMT, SCADA_MGMT_TYPE.CLOSE, {})
end
-- try to connect to the supervisor if not already linked
---@param abort boolean? true to print out cancel info if not linked (use on program terminate)
---@return boolean ok, boolean start_ui
function public.try_connect(abort)
local ok = true
local start_ui = false
-- attempt to connect to the subervisor
---@nodiscard
---@param timeout_s number timeout in seconds
---@param tick_dmesg_waiting function callback to tick dmesg waiting
---@param task_done function callback to show done on dmesg
---@return boolean sv_linked true if connected, false otherwise
--- EVENT_CONSUMER: this function consumes events
function public.sv_connect(timeout_s, tick_dmesg_waiting, task_done)
local clock = util.new_clock(1)
local start = util.time_s()
local terminated = false
if not self.sv_linked then
if self.est_tick_waiting == nil then
self.est_start = util.time_s()
self.est_last = self.est_start
self.est_tick_waiting, self.est_task_done =
coordinator.log_comms_connecting("attempting to connect to configured supervisor on channel " .. svr_channel)
_send_establish()
clock.start()
while (util.time_s() - start) < timeout_s and (not self.sv_linked) and (not self.sv_config_err) do
local event, p1, p2, p3, p4, p5 = util.pull_event()
if event == "timer" and clock.is_clock(p1) then
-- timed out attempt, try again
tick_dmesg_waiting(math.max(0, timeout_s - (util.time_s() - start)))
_send_establish()
clock.start()
elseif event == "timer" then
-- keep checking watchdog timers
apisessions.check_all_watchdogs(p1)
elseif event == "modem_message" then
-- handle message
local packet = public.parse_packet(p1, p2, p3, p4, p5)
public.handle_packet(packet)
elseif event == "terminate" then
terminated = true
break
end
else
self.est_tick_waiting(math.max(0, LINK_TIMEOUT - (util.time_s() - self.est_start)))
end
task_done(self.sv_linked)
if abort or (util.time_s() - self.est_start) >= LINK_TIMEOUT then
self.est_task_done(false)
if terminated then
if abort then
coordinator.log_comms("supervisor connection attempt cancelled by user")
elseif self.sv_config_err then
coordinator.log_comms("supervisor cooling configuration invalid, check supervisor config file")
@ -360,7 +339,32 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
end
end
return self.sv_linked
ok = false
elseif self.sv_config_err then
coordinator.log_comms("supervisor cooling configuration invalid, check supervisor config file")
ok = false
elseif (util.time_s() - self.est_last) > 1.0 then
_send_establish()
self.est_last = util.time_s()
end
elseif self.est_tick_waiting ~= nil then
self.est_task_done(true)
self.est_tick_waiting = nil
self.est_task_done = nil
start_ui = true
end
return ok, start_ui
end
-- close the connection to the server
function public.close()
sv_watchdog.cancel()
self.sv_addr = comms.BROADCAST
self.sv_linked = false
self.sv_r_seq_num = nil
iocontrol.fp_link_state(types.PANEL_LINK_STATE.DISCONNECTED)
_send_sv(PROTOCOL.SCADA_MGMT, SCADA_MGMT_TYPE.CLOSE, {})
end
-- send a facility command
@ -426,7 +430,10 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
-- handle a packet
---@param packet mgmt_frame|crdn_frame|capi_frame|nil
---@return boolean close_ui
function public.handle_packet(packet)
local was_linked = self.sv_linked
if packet ~= nil then
local l_chan = packet.scada_frame.local_channel()
local r_chan = packet.scada_frame.remote_channel()
@ -436,7 +443,9 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
if l_chan ~= crd_channel then
log.debug("received packet on unconfigured channel " .. l_chan, true)
elseif r_chan == pkt_channel then
if protocol == PROTOCOL.COORD_API then
if not self.sv_linked then
log.debug("discarding pocket API packet before linked to supervisor")
elseif protocol == PROTOCOL.COORD_API then
---@cast packet capi_frame
-- look for an associated session
local session = apisessions.find_session(src_addr)
@ -497,12 +506,12 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
-- check sequence number
if self.sv_r_seq_num == nil then
self.sv_r_seq_num = packet.scada_frame.seq_num()
elseif self.connected and ((self.sv_r_seq_num + 1) ~= packet.scada_frame.seq_num()) then
elseif self.sv_linked and ((self.sv_r_seq_num + 1) ~= packet.scada_frame.seq_num()) then
log.warning("sequence out-of-order: last = " .. self.sv_r_seq_num .. ", new = " .. packet.scada_frame.seq_num())
return
return false
elseif self.sv_linked and src_addr ~= self.sv_addr then
log.debug("received packet from unknown computer " .. src_addr .. " while linked; channel in use by another system?")
return
return false
else
self.sv_r_seq_num = packet.scada_frame.seq_num()
end
@ -632,7 +641,37 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
end
elseif protocol == PROTOCOL.SCADA_MGMT then
---@cast packet mgmt_frame
if packet.type == SCADA_MGMT_TYPE.ESTABLISH then
if self.sv_linked then
if packet.type == SCADA_MGMT_TYPE.KEEP_ALIVE then
-- keep alive request received, echo back
if packet.length == 1 then
local timestamp = packet.data[1]
local trip_time = util.time() - timestamp
if trip_time > 750 then
log.warning("coordinator KEEP_ALIVE trip time > 750ms (" .. trip_time .. "ms)")
end
-- log.debug("coordinator RTT = " .. trip_time .. "ms")
iocontrol.get_db().facility.ps.publish("sv_ping", trip_time)
_send_keep_alive_ack(timestamp)
else
log.debug("SCADA keep alive packet length mismatch")
end
elseif packet.type == SCADA_MGMT_TYPE.CLOSE then
-- handle session close
sv_watchdog.cancel()
self.sv_addr = comms.BROADCAST
self.sv_linked = false
self.sv_r_seq_num = nil
iocontrol.fp_link_state(types.PANEL_LINK_STATE.DISCONNECTED)
log.info("server connection closed by remote host")
else
log.debug("received unknown SCADA_MGMT packet type " .. packet.type)
end
elseif packet.type == SCADA_MGMT_TYPE.ESTABLISH then
-- connection with supervisor established
if packet.length == 2 then
local est_ack = packet.data[1]
@ -662,6 +701,7 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
self.sv_addr = src_addr
self.sv_linked = true
self.sv_r_seq_num = nil
self.sv_config_err = false
iocontrol.fp_link_state(types.PANEL_LINK_STATE.LINKED)
@ -703,36 +743,6 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
else
log.debug("SCADA_MGMT establish packet length mismatch")
end
elseif self.sv_linked then
if packet.type == SCADA_MGMT_TYPE.KEEP_ALIVE then
-- keep alive request received, echo back
if packet.length == 1 then
local timestamp = packet.data[1]
local trip_time = util.time() - timestamp
if trip_time > 750 then
log.warning("coordinator KEEP_ALIVE trip time > 750ms (" .. trip_time .. "ms)")
end
-- log.debug("coordinator RTT = " .. trip_time .. "ms")
iocontrol.get_db().facility.ps.publish("sv_ping", trip_time)
_send_keep_alive_ack(timestamp)
else
log.debug("SCADA keep alive packet length mismatch")
end
elseif packet.type == SCADA_MGMT_TYPE.CLOSE then
-- handle session close
sv_watchdog.cancel()
self.sv_addr = comms.BROADCAST
self.sv_linked = false
self.sv_r_seq_num = nil
iocontrol.fp_link_state(types.PANEL_LINK_STATE.DISCONNECTED)
log.info("server connection closed by remote host")
else
log.debug("received unknown SCADA_MGMT packet type " .. packet.type)
end
else
log.debug("discarding non-link SCADA_MGMT packet before linked")
end
@ -743,6 +753,8 @@ function coordinator.comms(version, nic, crd_channel, svr_channel, pkt_channel,
log.debug("received packet for unknown channel " .. r_chan, true)
end
end
return was_linked and not self.sv_linked
end
-- check if the coordinator is still linked to the supervisor

View File

@ -22,7 +22,7 @@ local sounder = require("coordinator.sounder")
local apisessions = require("coordinator.session.apisessions")
local COORDINATOR_VERSION = "v0.19.0"
local COORDINATOR_VERSION = "v0.19.1"
local println = util.println
local println_ts = util.println_ts
@ -31,7 +31,6 @@ local log_graphics = coordinator.log_graphics
local log_sys = coordinator.log_sys
local log_boot = coordinator.log_boot
local log_comms = coordinator.log_comms
local log_comms_connecting = coordinator.log_comms_connecting
local log_crypto = coordinator.log_crypto
----------------------------------------
@ -173,7 +172,7 @@ local function main()
local loop_clock = util.new_clock(MAIN_CLOCK)
----------------------------------------
-- start front panel
-- start front panel & UI start function
----------------------------------------
log_graphics("starting front panel UI...")
@ -187,39 +186,9 @@ local function main()
return
else log_graphics("front panel ready") end
----------------------------------------
-- connect to the supervisor
----------------------------------------
-- attempt to connect to the supervisor or exit
local function init_connect_sv()
local tick_waiting, task_done = log_comms_connecting("attempting to connect to configured supervisor on channel " .. config.SVR_CHANNEL)
-- attempt to establish a connection with the supervisory computer
if not coord_comms.sv_connect(60, tick_waiting, task_done) then
log_sys("supervisor connection failed, shutting down...")
log.fatal("failed to connect to supervisor")
return false
end
return true
end
if not init_connect_sv() then
println("startup> failed to connect to supervisor")
log_sys("system shutdown")
return
else
log_sys("supervisor connected, proceeding to UI start")
end
----------------------------------------
-- start the UI
----------------------------------------
-- start up the UI
-- start up the main UI
---@return boolean ui_ok started ok
local function init_start_ui()
local function start_main_ui()
log_graphics("starting main UI...")
local draw_start = util.time_ms()
@ -228,36 +197,29 @@ local function main()
if not ui_ok then
renderer.close_ui()
log_graphics(util.c("main UI error: ", ui_message))
println_ts("main UI creation failed")
log.fatal(util.c("main GUI render failed with error ", ui_message))
else
log_graphics("first main UI draw took " .. (util.time_ms() - draw_start) .. "ms")
-- start clock
loop_clock.start()
log_graphics("main UI draw took " .. (util.time_ms() - draw_start) .. "ms")
end
return ui_ok
end
local ui_ok = init_start_ui()
----------------------------------------
-- main event loop
----------------------------------------
local link_failed = false
local ui_ok = true
local date_format = util.trinary(config.TIME_24_HOUR, "%X \x04 %A, %B %d %Y", "%r \x04 %A, %B %d %Y")
if ui_ok then
-- start connection watchdog
conn_watchdog.feed()
log.debug("startup> conn watchdog started")
-- start clock
loop_clock.start()
log_sys("system started successfully")
end
-- main event loop
while ui_ok do
while true do
local event, param1, param2, param3, param4, param5 = util.pull_event()
-- handle event
@ -271,7 +233,6 @@ local function main()
if nic.is_modem(device) then
nic.disconnect()
log_sys("comms modem disconnected")
println_ts("wireless modem disconnected!")
-- close out UI
renderer.close_ui()
@ -287,18 +248,13 @@ local function main()
if renderer.is_monitor_used(device) then
---@todo will be handled properly in #249
-- "halt and catch fire" style handling
local msg = "lost a configured monitor, system will now exit"
println_ts(msg)
log_sys(msg)
log_sys("lost a configured monitor, system will now exit")
break
else
log_sys("lost unused monitor, ignoring")
end
elseif type == "speaker" then
local msg = "lost alarm sounder speaker"
println_ts(msg)
log_sys(msg)
log_sys("lost alarm sounder speaker")
iocontrol.fp_has_speaker(false)
end
end
@ -309,15 +265,8 @@ local function main()
if type == "modem" then
if device.isWireless() then
-- reconnected modem
nic.connect(device)
log_sys("comms modem reconnected")
println_ts("wireless modem reconnected.")
-- re-init system
if not init_connect_sv() then break end
ui_ok = init_start_ui()
nic.connect(device)
iocontrol.fp_has_modem(true)
else
log_sys("wired modem reconnected")
@ -326,10 +275,7 @@ local function main()
---@todo will be handled properly in #249
-- not supported, system will exit on loss of in-use monitors
elseif type == "speaker" then
local msg = "alarm sounder speaker reconnected"
println_ts(msg)
log_sys(msg)
log_sys("alarm sounder speaker reconnected")
sounder.reconnect(device)
iocontrol.fp_has_speaker(true)
end
@ -337,8 +283,25 @@ local function main()
elseif event == "timer" then
if loop_clock.is_clock(param1) then
-- main loop tick
-- toggle heartbeat
iocontrol.heartbeat()
-- maintain connection
if nic.connected() then
local ok, start_ui = coord_comms.try_connect()
if not ok then
link_failed = true
log_sys("supervisor connection failed, shutting down...")
log.fatal("failed to connect to supervisor")
break
elseif start_ui then
log_sys("supervisor connected, proceeding to main UI start")
ui_ok = start_main_ui()
if not ui_ok then break end
end
end
-- iterate sessions
apisessions.iterate_all()
@ -346,25 +309,19 @@ local function main()
apisessions.free_all_closed()
-- update date and time string for main display
if coord_comms.is_linked() then
iocontrol.get_db().facility.ps.publish("date_time", os.date(date_format))
end
loop_clock.start()
elseif conn_watchdog.is_timer(param1) then
-- supervisor watchdog timeout
local msg = "supervisor server timeout"
log_comms(msg)
println_ts(msg)
log_comms("supervisor server timeout")
-- close connection, UI, and stop sounder
-- close connection, main UI, and stop sounder
coord_comms.close()
renderer.close_ui()
sounder.stop()
if nic.connected() then
-- try to re-connect to the supervisor
if not init_connect_sv() then break end
ui_ok = init_start_ui()
end
else
-- a non-clock/main watchdog timer event
@ -377,22 +334,15 @@ local function main()
elseif event == "modem_message" then
-- got a packet
local packet = coord_comms.parse_packet(param1, param2, param3, param4, param5)
coord_comms.handle_packet(packet)
-- check if it was a disconnect
if not coord_comms.is_linked() then
-- handle then check if it was a disconnect
if coord_comms.handle_packet(packet) then
log_comms("supervisor closed connection")
-- close connection, UI, and stop sounder
coord_comms.close()
renderer.close_ui()
sounder.stop()
if nic.connected() then
-- try to re-connect to the supervisor
if not init_connect_sv() then break end
ui_ok = init_start_ui()
end
end
elseif event == "monitor_touch" or event == "mouse_click" or event == "mouse_up" or
event == "mouse_drag" or event == "mouse_scroll" then
@ -405,10 +355,17 @@ local function main()
-- check for termination request
if event == "terminate" or ppm.should_terminate() then
println_ts("terminate requested, closing connections...")
-- handle supervisor connection
link_failed = coord_comms.try_connect(true)
if coord_comms.is_linked() then
log_comms("terminate requested, closing supervisor connection...")
else link_failed = true end
coord_comms.close()
log_comms("supervisor connection closed")
-- handle API sessions
log_comms("closing api sessions...")
apisessions.close_all()
log_comms("api sessions closed")
@ -421,6 +378,9 @@ local function main()
sounder.stop()
log_sys("system shutdown")
if link_failed then println_ts("failed to connect to supervisor") end
if not ui_ok then println_ts("main UI creation failed") end
println_ts("exited")
log.info("exited")
end