23HealthCheck::HealthCheck()
30 last_health_check_ns_ = _apps_time_monotonic_ns();
38HealthCheck::~HealthCheck()
51 std::lock_guard<std::mutex> lock(health_mutex_);
55 std::cerr <<
"HealthCheck already running" << std::endl;
59 running_.store(
true, std::memory_order_release);
60 health_thread_ = std::thread(&HealthCheck::healthCheckLoop,
this);
61 health_thread_.detach();
63 std::cout <<
"HealthCheck started - monitoring at 30Hz" << std::endl;
73 std::lock_guard<std::mutex> lock(health_mutex_);
80 running_.store(
false, std::memory_order_release);
83 std::this_thread::sleep_for(std::chrono::milliseconds(100));
85 std::cout <<
"HealthCheck stopped" << std::endl;
97void HealthCheck::healthCheckLoop()
99 const int64_t health_check_period_ns = 33333333;
103 auto start_time = _apps_time_monotonic_ns();
105 checkSystemConnectivity();
111 std::cerr <<
"[HEALTH] ERROR: IMU disconnected; publishing blank VIO data" << std::endl;
113 std::cerr <<
"[HEALTH] ERROR: Camera disconnected; publishing blank VIO data" << std::endl;
114 std::cout <<
"[HEALTH] Publishing blank VIO packet due to missing sensors" << std::endl;
120 monitorSystemPerformance();
121 checkAutoResetConditions();
122 checkVINSResetRequest();
126 health_check_count_++;
127 last_health_check_ns_ = start_time;
130 int64_t elapsed_ns = _apps_time_monotonic_ns() - start_time;
131 int64_t sleep_ns = std::max<int64_t>(0, health_check_period_ns - elapsed_ns);
135 std::this_thread::sleep_for(std::chrono::nanoseconds(sleep_ns));
146void HealthCheck::analyzeErrorCodes()
151 uint32_t new_errors = current_error_codes & ~last_error_codes_;
152 uint32_t cleared_errors = last_error_codes_ & ~current_error_codes;
156 std::cerr <<
"[HEALTH] New errors detected: 0x" << std::hex << (int)new_errors << std::dec << std::endl;
157 printf(
"[DEBUG] Current error codes: 0x%x, New errors: 0x%x\n", (
int)current_error_codes, (
int)new_errors);
160 if (new_errors & ERROR_CODE_COVARIANCE)
162 std::cerr <<
"[HEALTH] ERROR: Covariance matrix not positive definite" << std::endl;
164 if (new_errors & ERROR_CODE_IMU_OOB)
166 std::cerr <<
"[HEALTH] ERROR: IMU exceeded range (out of bounds)" << std::endl;
168 if (new_errors & ERROR_CODE_IMU_BW)
170 std::cerr <<
"[HEALTH] ERROR: IMU bandwidth too low" << std::endl;
172 if (new_errors & ERROR_CODE_NOT_STATIONARY)
174 std::cerr <<
"[HEALTH] ERROR: System not stationary at initialization" << std::endl;
176 if (new_errors & ERROR_CODE_NO_FEATURES)
178 std::cerr <<
"[HEALTH] ERROR: No features for extended period" << std::endl;
180 if (new_errors & ERROR_CODE_CONSTRAINT)
182 std::cerr <<
"[HEALTH] ERROR: Insufficient constraints from features" << std::endl;
184 if (new_errors & ERROR_CODE_FEATURE_ADD)
186 std::cerr <<
"[HEALTH] ERROR: Failed to add new features" << std::endl;
188 if (new_errors & ERROR_CODE_VEL_INST_CERT)
190 std::cerr <<
"[HEALTH] ERROR: Exceeded instant velocity uncertainty" << std::endl;
192 if (new_errors & ERROR_CODE_VEL_WINDOW_CERT)
194 std::cerr <<
"[HEALTH] ERROR: Exceeded velocity uncertainty" << std::endl;
196 if (new_errors & ERROR_CODE_DROPPED_IMU)
198 std::cerr <<
"[HEALTH] WARNING: Dropped IMU samples" << std::endl;
200 if (new_errors & ERROR_CODE_BAD_CAM_CAL)
202 std::cerr <<
"[HEALTH] ERROR: Intrinsic camera calibration questionable" << std::endl;
204 if (new_errors & ERROR_CODE_LOW_FEATURES)
206 std::cerr <<
"[HEALTH] ERROR: Insufficient good features to initialize" << std::endl;
208 if (new_errors & ERROR_CODE_DROPPED_CAM)
210 std::cerr <<
"[HEALTH] WARNING: Dropped camera frame" << std::endl;
212 if (new_errors & ERROR_CODE_DROPPED_GPS_VEL)
214 std::cerr <<
"[HEALTH] WARNING: Dropped GPS velocity sample" << std::endl;
216 if (new_errors & ERROR_CODE_BAD_TIMESTAMP)
218 std::cerr <<
"[HEALTH] ERROR: Sensor measurements with bad timestamps" << std::endl;
219 printf(
"[DEBUG] Health check detected ERROR_CODE_BAD_TIMESTAMP\n");
221 if (new_errors & ERROR_CODE_IMU_MISSING)
223 std::cerr <<
"[HEALTH] ERROR: Missing IMU data" << std::endl;
225 if (new_errors & ERROR_CODE_CAM_MISSING)
227 std::cerr <<
"[HEALTH] ERROR: Missing camera frames" << std::endl;
229 if (new_errors & ERROR_CODE_CAM_BAD_RES)
231 std::cerr <<
"[HEALTH] ERROR: Camera resolution unsupported" << std::endl;
233 if (new_errors & ERROR_CODE_CAM_BAD_FORMAT)
235 std::cerr <<
"[HEALTH] ERROR: Camera format unsupported" << std::endl;
237 if (new_errors & ERROR_CODE_UNKNOWN)
239 std::cerr <<
"[HEALTH] ERROR: Unknown error" << std::endl;
241 if (new_errors & ERROR_CODE_STALLED)
243 std::cerr <<
"[HEALTH] ERROR: Frame processing stalled" << std::endl;
247 if (cleared_errors != 0)
249 std::cout <<
"[HEALTH] Errors cleared: 0x" << std::hex << (int)cleared_errors << std::dec << std::endl;
252 last_error_codes_ = current_error_codes;
261void HealthCheck::checkSystemConnectivity()
265 const int64_t sensor_timeout_ns = 5000000000;
266 int64_t now_ns = _apps_time_monotonic_ns();
272 std::cerr <<
"[HEALTH] IMU likely disconnected --> stale data (no data for "
282 std::cerr <<
"[HEALTH] Camera likely disconnected --> stale data (no data for "
288 bool current_imu_connected =
is_imu_connected.load(std::memory_order_acquire);
289 bool current_cam_connected =
is_cam_connected.load(std::memory_order_acquire);
292 if (current_imu_connected != last_imu_connected_)
294 if (current_imu_connected)
296 std::cout <<
"[HEALTH] IMU connected" << std::endl;
301 std::cout <<
"[HEALTH] Reset requested due to IMU reconnection" << std::endl;
305 std::cerr <<
"[HEALTH] ERROR: IMU disconnected" << std::endl;
308 last_imu_connected_ = current_imu_connected;
312 if (current_cam_connected != last_cam_connected_)
314 if (current_cam_connected)
316 std::cout <<
"[HEALTH] Camera connected" << std::endl;
322 if (first_camera_connection_seen_) {
325 std::cout <<
"[HEALTH] Reset requested due to camera reconnection" << std::endl;
327 first_camera_connection_seen_ =
true;
332 std::cerr <<
"[HEALTH] ERROR: Camera disconnected" << std::endl;
335 last_cam_connected_ = current_cam_connected;
339 uint8_t current_vio_state =
vio_state.load(std::memory_order_acquire);
340 if (current_vio_state != last_vio_state_)
342 std::cout <<
"[HEALTH] VIO state changed: " << (int)last_vio_state_ <<
" -> " << (
int)current_vio_state << std::endl;
343 last_vio_state_ = current_vio_state;
353void HealthCheck::monitorSystemPerformance()
355 static int64_t last_performance_log_ns = 0;
356 int64_t current_time_ns = _apps_time_monotonic_ns();
359 if (current_time_ns - last_performance_log_ns > 5000000000)
361 std::cout <<
"[HEALTH] Performance - Health checks: " << health_check_count_
365 last_performance_log_ns = current_time_ns;
366 health_check_count_ = 0;
376void HealthCheck::checkAutoResetConditions()
384 int64_t now = _apps_time_monotonic_ns();
385 if (now - time_of_last_reset < INIT_FAILURE_TIMEOUT_NS)
400 if (current_error_codes != 0)
402 std::cerr <<
"[HEALTH] AUTO-RESET RECOMMENDED: Error code(s) detected: 0x" << std::hex << (int)current_error_codes << std::dec << std::endl;
415void HealthCheck::checkVINSResetRequest()
422 int64_t current_time = _apps_time_monotonic_ns();
423 uint64_t time_since_reset = current_time - time_of_last_reset;
424 if (time_since_reset <= INIT_FAILURE_TIMEOUT_NS)
426 std::cout <<
"[HEALTH] Reset requested but last reset was too recent ("
427 << (time_since_reset / 1000000) <<
"ms ago), ignoring request" << std::endl;
432 if (
is_resetting.exchange(
true, std::memory_order_acq_rel))
434 std::cout <<
"[HEALTH] Reset already in progress, ignoring request\n";
439 std::cout <<
"[HEALTH] Reset requested, preparing to reset VIO system" << std::endl;
447 catch (
const std::exception &e)
449 fprintf(stderr,
"[ERROR] Exception during reset: %s\n", e.what());
451 if (strstr(e.what(),
"Operation not permitted") !=
nullptr)
453 fprintf(stderr,
"[ERROR] Permission denied during reset - this may be due to insufficient privileges\n");
460 std::cout <<
"[HEALTH] VIO system reset successfully" << std::endl;
463 vio_state.store(VIO_STATE_INITIALIZING, std::memory_order_release);
464 std::cout <<
"[HEALTH] VIO state set to INITIALIZING after reset" << std::endl;
472 std::cerr <<
"[HEALTH] VIO system reset failed with code: " << rc << std::endl;
474 vio_state.store(VIO_STATE_INITIALIZING, std::memory_order_release);
479 time_of_last_reset = _apps_time_monotonic_ns();
486int HealthCheck::doHardReset()
490 std::unique_lock<std::mutex> lk(
reset_mtx);
492 bool wait_result =
reset_cv.wait_for(lk, std::chrono::seconds(5),
501 fprintf(stderr,
"[ERROR] Timeout waiting for callbacks to finish during reset. active_callbacks=%d\n",
508 printf(
"[HEALTH] Hard reset in progress\n");
514 std::cout <<
"[HEALTH] VIO manager was uninitialized, creating a fresh instance" << std::endl;
520 catch (
const std::exception &e)
522 fprintf(stderr,
"[ERROR] Failed to create VIO manager during reset: %s\n", e.what());
530 std::unique_ptr<ov_msckf::VioManager> old_vio_manager;
531 std::unique_ptr<ov_msckf::VioManager> new_vio_manager;
537 if (!new_vio_manager)
539 fprintf(stderr,
"[ERROR] Failed to create new VIO manager object\n");
540 throw std::runtime_error(
"Failed to create new VIO manager");
546 catch (
const std::exception &e)
548 fprintf(stderr,
"[ERROR] Exception during VIO manager creation: %s\n", e.what());
555 std::cerr <<
"[HEALTH] Warning: no previous VIO manager to restore" << std::endl;
561 old_vio_manager.reset();
583 uint32_t current_errors =
vio_error_codes.load(std::memory_order_acquire);
584 uint32_t new_errors = current_errors & ~error_mask;
590 std::cout <<
"[HEALTH] Cleared error codes: 0x" << std::hex << (int)error_mask << std::dec << std::endl;
Housekeeping and data publishing for VOXL OpenVINS.
volatile int64_t last_cam_time
Timestamp of last camera data (nanoseconds)
volatile int64_t last_imu_timestamp_ns
Timestamp of last IMU data (nanoseconds)
volatile int main_running
Main process running flag.
std::atomic< uint32_t > active_callbacks
Number of callbacks inside the system.
ov_msckf::VioManagerOptions vio_manager_options
VIO manager options.
int en_auto_reset
Enable automatic reset functionality.
std::mutex reset_mtx
Mutex used by reset thread.
std::atomic< uint32_t > reset_num_counter
Counter which increments on resets.
std::unique_ptr< ov_msckf::VioManager > vio_manager
Main VIO manager instance.
int en_debug
Enable debug output.
std::condition_variable reset_cv
Reset conditional variable.
std::atomic< bool > is_resetting
VIO reset state flag.
std::atomic< uint8_t > vio_state
Current VIO system state.
std::atomic< uint32_t > vio_error_codes
VIO error codes.
std::atomic< bool > is_imu_connected
IMU connection state.
std::atomic< bool > reset_requested
Should reset floag.
std::atomic< bool > is_cam_connected
Camera connection state.
void start()
Start the health check system.
static void clearErrorCodes(uint32_t error_mask)
Clear specific error codes.
void stop()
Stop the health check system.
void set_first_packet(bool first_packet_)
Set the first packet flag.
static Publisher & getInstance()
Get singleton instance.
Main namespace for VOXL OpenVINS server components.