Configuration¶
We provide the explanation of the configurations in this file. A KITTI experiment is used as an example. We also provide possible choices in the current framework. We have some unused configurations (will be commented) for our Experiment Version but not used in this Release Version.
Notes:
None: when the choice is None, it means leave the option blank. DON’T put ‘None’, which yaml would recognize as string ‘None’
# ------------------------------------
# Basic setup
# ------------------------------------
dataset: kitti_odom # dataset [kitti_odom, kitti_raw, tum-1/2/3, adelaide1/2]
seed: 4869 # random seed
image:
height: 192 # image height
width: 640 # image width
ext: jpg # image file extension for data loading
seq: "10" # sequence to run
frame_step: 1 # frame step
# ------------------------------------
# Directories
# ------------------------------------
directory:
result_dir: {RESULT_DIR} # directory to save result
img_seq_dir: {IMAGE_DATA_DIR} # image data directory
gt_pose_dir: {GT_POSE_DIR} # (optional) ground-truth pose data directory
depth_dir: {DEPTH_DATA_DIR} # (optional) external depth data, e.g. ground-truth depths
# ------------------------------------
# Depth
# ------------------------------------
depth: # Depth configuration
depth_src: # depth source [None, gt]
# None - depth model predition
# gt - use ground truth depth
deep_depth:
network: monodepth2 # depth network
pretrained_model: {MODEL_DIR} # directory stores depth.pth and encoder.pth
max_depth: 50 # maximum depth
min_depth: 0 # minimum depth
# ------------------------------------
# Deep flow
# ------------------------------------
deep_flow: # Deep optical flow configuration
network: liteflow # optical flow network, [liteflow]
flow_net_weight: {FLOW_MODEL} # optical flow model path
forward_backward: True # predict both forward/backward flows and compute forward-backward flow consistency
# ------------------------------------
# Deep Pose (Experiment Ver. only)
# ------------------------------------
deep_pose: # Deep pose network configuration
enable: False # enable/disable pose network
pretrained_model: {MODEL_DIR} # model directory contains pose_encoder.pth and pose.pth
# ------------------------------------
# Online Finetuning
# ------------------------------------
online_finetune: # online fine-tuning configuration
enable: False # enable/disable flow finetuning
lr: 0.00001 # learning rate
num_frames: # number of frames to be fine-tuned, [None, int]
flow: # flow fine-tuning configuration
enable: False # enable/disable flow finetuning
scales: [1, 2, 3, 4, 5] # scales to be used for training
loss: # flow loss configuration
flow_consistency: 0.005 # forward-backward flow consistency loss weight
flow_smoothness: 0.1 # flow smoothness loss weight
depth: # depth fine-tuning configuration
enable: False # enable/disable depth finetuning
scales: [0, 1, 2, 3] # scales to be used for training
pose_src: DF-VO # pose source for depth-pose finetuning [DF-Vo, deep_pose]
loss: # depth loss configuration
apperance_loss: 1 # apperance loss weight
disparity_smoothness: 0.001 # disparity smoothness loss weight
depth_consistency: 0.001 # depth consistency loss weight
pose: # pose finetuning configuration (with depth)
enable: False # enable/disable pose finetuning
# ------------------------------------
# Preprocessing
# ------------------------------------
crop: # cropping configuration
depth_crop: [[0.3, 1], [0, 1]] # depth map cropping, format: [[y0, y1],[x0, x1]]
flow_crop: [[0, 1], [0, 1]] # optical flow map cropping, format: [[y0, y1],[x0, x1]]
# ------------------------------------
# Correspondence (keypoint) selection
# ------------------------------------
kp_selection: # correspondence selection configuration
local_bestN: # local best-N configuration
enable: True # enable/disable local best-N selection
num_bestN: 2000 # number of keypoints
num_row: 10 # number of divided rows
num_col: 10 # number of divided columns
score_method: flow # selection score, [flow, flow_ratio]
# flow: L2 distance of forward-backward flow
# flow_ratio: relative flow difference ratio
thre: 0.1 # flow consistency masking threshold
bestN:
enable: False # enable/disable best-N selection
num_bestN: 2000 # number of keypoints
sampled_kp: # random/uniform keypoint sampling
enable: False # enable/disable random/uniform keypoint sampling
num_kp: 2000 # number of keypoints
rigid_flow_kp: # keypoint selection from optical-rigid flow consistency (for scale recovery)
enable: False # enable/disable rigid-flow based keypoint selection
num_bestN: 2000 # number of keypoints
num_row: 10 # number of divided rows
num_col: 10 # number of divided columns
score_method: flow # selection score, [flow]
rigid_flow_thre: 3 # masking threshold for rigid-optical flow consistency
optical_flow_thre: 0.1 # masking threshold for forward-backward flow consistency
depth_consistency: # (Experiement Ver. only) depth consistency configuration
enable: False # enable/disable depth consistency
thre: 0.05 # masking threshold
# ------------------------------------
# Tracking
# ------------------------------------
tracking_method: hybrid # tracking method [hybrid, PnP, deep_pose]
# hybrid - E-tracker + PnP-tracker;
# PnP - PnP-tracker
# deep_pose - pose_cnn-tracker
e_tracker: # E-tracker configuration
ransac: # Ransac configuration
reproj_thre: 0.2 # inlier threshold value
repeat: 5 # number of repeated Ransac
validity: # model selection condition
method: GRIC # method of validating E-tracker, [flow, GRIC]
thre: # threshold value for model selection, only used in [flow]
kp_src: kp_best # type of correspondences to be used [kp_list, kp_best]
# kp_list - uniformaly sampled keypoints
# kp_best - keypoints sampled from best-N / local best method
scale_recovery: # scale recovery configuration
method: simple # scale recovery method [simple, iterative]
ransac: # Ransac configuration
method: depth_ratio # fitting target [depth_ratio, abs_diff]
# depth_ratio: find a scale s.t. most triangulated_depth/cnn_depth close to 1
# abs_diff: find a scale s.t. abs(triangulated_depth - cnn_depth) close to 0
min_samples: 3 # minimum number of min_samples
max_trials: 100 # maximum number of trials
stop_prob: 0.99 # The probability that the algorithm produces a useful result
thre: 0.1 # inlier threshold value
kp_src: kp_best # type of correspondences to be used [kp_list, kp_best, kp_depth]
# kp_list - uniformaly sampled keypoints
# kp_best - keypoints sampled from best-N / local best method
# kp_depth - keypoints sampled after optical-rigid flow consistency masking
pnp_tracker: # PnP-tracker configuration
ransac: # Ransac configuration
iter: 100 # number of iteration
reproj_thre: 1 # inlier threshold value
repeat: 5 # number of repeated Ransac
kp_src: kp_best # type of correspondences to be used [kp_list, kp_best, kp_depth]
# kp_list - uniformaly sampled keypoints
# kp_best - keypoints sampled from best-N / local best method
# kp_depth - keypoints sampled after optical-rigid flow consistency masking
# ------------------------------------
# Visualization
# ------------------------------------
visualization: # visualization configuration
enable: True # enable/disable frame drawer
save_img: True # enable/disable save frames
window_h: 900 # frame window height
window_w: 1500 # frame window width
kp_src: kp_best # type of correspondences to be drawn
flow: # optical flow visualization configuration
vis_forward_flow: True # enable/disable forward flow visualization
vis_backward_flow: True # enable/disable backward flow visualization
vis_flow_diff: True # enable/disable forward-backward flow consistency visualization
vis_rigid_diff: False # enable/disable optical-rigid flow consistency visualization
kp_match: # keypoint matching visualization
kp_num: 100 # number of selected keypoints to be visualized
vis_temp: # keypoint matching in temporal
enable: True # enable/disable visualization
vis_side: # keypoint matching side-by-side
enable: True # enable/disable visualization
inlier_plot: False # enable/disable inlier plot
trajectory: # trajectory visualization configuration
vis_traj: True # enable/disable predicted trajectory visualization
vis_gt_traj: False # enable/disable ground truth trajectory visualization
mono_scale: 1 # scaling factor to align with gt (if gt is available)
depth: # depth visualization configuration
use_tracking_depth: False # enable/disable visualizing depth map used for tracking (preprocessed, e.g. range capping)
depth_disp: disp # visualize depth or disparity map [depth, disp]