The open source OpenXR runtime
1// Copyright 2019-2020, Collabora, Ltd.
2// SPDX-License-Identifier: BSL-1.0
3/*!
4 * @file
5 * @brief PS Move tracker code.
6 * @author Pete Black <pblack@collabora.com>
7 * @author Jakob Bornecrantz <jakob@collabora.com>
8 * @author Rylie Pavlik <rylie.pavlik@collabora.com>
9 * @ingroup aux_tracking
10 */
11
12#include "xrt/xrt_tracking.h"
13
14#include "tracking/t_tracking.h"
15#include "tracking/t_calibration_opencv.hpp"
16#include "tracking/t_tracker_psmv_fusion.hpp"
17#include "tracking/t_helper_debug_sink.hpp"
18
19#include "util/u_var.h"
20#include "util/u_misc.h"
21#include "util/u_debug.h"
22#include "util/u_frame.h"
23#include "util/u_format.h"
24#include "util/u_trace_marker.h"
25
26#include "math/m_api.h"
27
28#include "os/os_threading.h"
29
30#include <stdio.h>
31#include <assert.h>
32#include <pthread.h>
33#include <type_traits>
34
35
36using namespace xrt::auxiliary::tracking;
37
38//! Namespace for PS Move tracking implementation
39namespace xrt::auxiliary::tracking::psmv {
40
41/*!
42 * Single camera.
43 *
44 * @see TrackerPSMV
45 */
46struct View
47{
48public:
49 cv::Mat undistort_rectify_map_x;
50 cv::Mat undistort_rectify_map_y;
51
52 cv::Matx33d intrinsics;
53 cv::Mat distortion; // size may vary
54 enum t_camera_distortion_model distortion_model;
55
56 std::vector<cv::KeyPoint> keypoints;
57
58 cv::Mat frame_undist_rectified;
59
60 void
61 populate_from_calib(t_camera_calibration &calib, const RemapPair &rectification)
62 {
63 CameraCalibrationWrapper wrap(calib);
64 intrinsics = wrap.intrinsics_mat;
65 distortion = wrap.distortion_mat.clone();
66 distortion_model = wrap.distortion_model;
67
68 undistort_rectify_map_x = rectification.remap_x;
69 undistort_rectify_map_y = rectification.remap_y;
70 }
71};
72
73// Has to be standard layout because is embedded in TrackerPSMV.
74static_assert(std::is_standard_layout<View>::value);
75
76/*!
77 * The core object of the PS Move tracking setup.
78 *
79 * @implements xrt_tracked_psmv
80 * @implements xrt_frame_sink
81 * @implements xrt_frame_node
82 */
83struct TrackerPSMV
84{
85public:
86 struct xrt_tracked_psmv base = {};
87 struct xrt_frame_sink sink = {};
88 struct xrt_frame_node node = {};
89
90 //! Frame waiting to be processed.
91 struct xrt_frame *frame;
92
93 //! Thread and lock helper.
94 struct os_thread_helper oth;
95
96 bool tracked = false;
97
98 HelperDebugSink debug = {HelperDebugSink::AllAvailable};
99
100 //! Have we received a new IMU sample.
101 bool has_imu = false;
102
103 struct
104 {
105 struct xrt_vec3 pos = {};
106 struct xrt_quat rot = {};
107 } fusion;
108
109 View view[2];
110
111 bool calibrated;
112
113 cv::Mat disparity_to_depth;
114 cv::Vec3d r_cam_translation;
115 cv::Matx33d r_cam_rotation;
116
117 cv::Ptr<cv::SimpleBlobDetector> sbd;
118
119 std::shared_ptr<PSMVFusionInterface> filter;
120
121 xrt_vec3 tracked_object_position;
122};
123
124// Has to be standard layout because of first element casts we do.
125static_assert(std::is_standard_layout<TrackerPSMV>::value);
126
127/*!
128 * @brief Perform per-view (two in a stereo camera image) processing on an
129 * image, before tracking math is performed.
130 *
131 * Right now, this is mainly finding blobs/keypoints.
132 */
133static void
134do_view(TrackerPSMV &t, View &view, cv::Mat &grey, cv::Mat &rgb)
135{
136 XRT_TRACE_MARKER();
137
138 {
139 XRT_TRACE_IDENT(remap);
140
141 // Undistort and rectify the whole image.
142 cv::remap(grey, // src
143 view.frame_undist_rectified, // dst
144 view.undistort_rectify_map_x, // map1
145 view.undistort_rectify_map_y, // map2
146 cv::INTER_NEAREST, // interpolation
147 cv::BORDER_CONSTANT, // borderMode
148 cv::Scalar(0, 0, 0)); // borderValue
149 }
150
151 {
152 XRT_TRACE_IDENT(threshold);
153
154 cv::threshold(view.frame_undist_rectified, // src
155 view.frame_undist_rectified, // dst
156 32.0, // thresh
157 255.0, // maxval
158 0); // type
159 }
160
161 {
162 XRT_TRACE_IDENT(detect);
163
164 // Do blob detection with our masks.
165 //! @todo Re-enable masks.
166 t.sbd->detect(view.frame_undist_rectified, // image
167 view.keypoints, // keypoints
168 cv::noArray()); // mask
169 }
170
171
172 // Debug is wanted, draw the keypoints.
173 if (rgb.cols > 0) {
174 cv::drawKeypoints(view.frame_undist_rectified, // image
175 view.keypoints, // keypoints
176 rgb, // outImage
177 cv::Scalar(255, 0, 0), // color
178 cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); // flags
179 }
180}
181
182/*!
183 * @brief Helper struct that keeps the value that produces the lowest "score" as
184 * computed by your functor.
185 *
186 * Having this as a struct with a method, instead of a single "algorithm"-style
187 * function, lets you keep your complicated filtering logic in your own
188 * loop, calling in when you have a new candidate for "best".
189 *
190 * @note Create by calling make_lowest_score_finder() with your
191 * function/lambda that takes an element and returns the score, to deduce the
192 * un-spellable typename of the lambda.
193 *
194 * @tparam ValueType The type of a single element value - whatever you want to
195 * assign a score to.
196 * @tparam FunctionType The type of your functor/lambda that turns a ValueType
197 * into a float "score". Usually deduced.
198 */
199template <typename ValueType, typename FunctionType> struct FindLowestScore
200{
201 const FunctionType score_functor;
202 bool got_one{false};
203 ValueType best{};
204 float best_score{0};
205
206 void
207 handle_candidate(ValueType val)
208 {
209 float score = score_functor(val);
210 if (!got_one || score < best_score) {
211 best = val;
212 best_score = score;
213 got_one = true;
214 }
215 }
216};
217
218
219//! Factory function for FindLowestScore to deduce the functor type.
220template <typename ValueType, typename FunctionType>
221static FindLowestScore<ValueType, FunctionType>
222make_lowest_score_finder(FunctionType scoreFunctor)
223{
224 return FindLowestScore<ValueType, FunctionType>{scoreFunctor};
225}
226
227//! Convert our 2d point + disparities into 3d points.
228static cv::Point3f
229world_point_from_blobs(const cv::Point2f &left, const cv::Point2f &right, const cv::Matx44d &disparity_to_depth)
230{
231 float disp = left.x - right.x;
232 cv::Vec4d xydw(left.x, left.y, disp, 1.0f);
233
234 // Transform
235 cv::Vec4d h_world = disparity_to_depth * xydw;
236
237 // Divide by scale to get 3D vector from homogeneous coordinate.
238 cv::Point3f world_point( //
239 h_world[0] / h_world[3], //
240 h_world[1] / h_world[3], //
241 h_world[2] / h_world[3]); //
242
243 /*
244 * OpenCV camera space is right handed, -Y up, +Z forwards but
245 * Monados camera space is right handed, +Y up, -Z forwards so we need
246 * to invert y and z.
247 */
248 world_point.y = -world_point.y;
249 world_point.z = -world_point.z;
250
251 return world_point;
252}
253
254/*!
255 * @brief Perform tracking computations on a frame of video data.
256 */
257static void
258process(TrackerPSMV &t, struct xrt_frame *xf)
259{
260 XRT_TRACE_MARKER();
261
262 // Only IMU data: nothing to do
263 if (xf == NULL) {
264 return;
265 }
266
267 // Wrong type of frame: unreference and return?
268 if (xf->format != XRT_FORMAT_L8) {
269 xrt_frame_reference(&xf, NULL);
270 return;
271 }
272
273 if (!t.calibrated) {
274 return;
275 }
276
277 // Create the debug frame if needed.
278 t.debug.refresh(xf);
279
280 t.view[0].keypoints.clear();
281 t.view[1].keypoints.clear();
282
283 int cols = xf->width / 2;
284 int rows = xf->height;
285 int stride = xf->stride;
286
287 cv::Mat l_grey(rows, cols, CV_8UC1, xf->data, stride);
288 cv::Mat r_grey(rows, cols, CV_8UC1, xf->data + cols, stride);
289
290 do_view(t, t.view[0], l_grey, t.debug.rgb[0]);
291 do_view(t, t.view[1], r_grey, t.debug.rgb[1]);
292
293 cv::Point3f last_point(t.tracked_object_position.x, t.tracked_object_position.y, t.tracked_object_position.z);
294 auto nearest_world = make_lowest_score_finder<cv::Point3f>([&](const cv::Point3f &world_point) {
295 //! @todo don't really need the square root to be done here.
296 return cv::norm(world_point - last_point);
297 });
298 // do some basic matching to come up with likely disparity-pairs.
299
300 const cv::Matx44d disparity_to_depth = static_cast<cv::Matx44d>(t.disparity_to_depth);
301
302 for (const cv::KeyPoint &l_keypoint : t.view[0].keypoints) {
303 cv::Point2f l_blob = l_keypoint.pt;
304
305 auto nearest_blob = make_lowest_score_finder<cv::Point2f>(
306 [&](const cv::Point2f &r_blob) { return l_blob.x - r_blob.x; });
307
308 for (const cv::KeyPoint &r_keypoint : t.view[1].keypoints) {
309 cv::Point2f r_blob = r_keypoint.pt;
310 // find closest point on same-ish scanline
311 if ((l_blob.y < r_blob.y + 3) && (l_blob.y > r_blob.y - 3)) {
312 nearest_blob.handle_candidate(r_blob);
313 }
314 }
315 //! @todo do we need to avoid claiming the same counterpart
316 //! several times?
317 if (nearest_blob.got_one) {
318 cv::Point3f pt = world_point_from_blobs(l_blob, nearest_blob.best, disparity_to_depth);
319 nearest_world.handle_candidate(pt);
320 }
321 }
322
323 if (nearest_world.got_one) {
324 cv::Point3f world_point = nearest_world.best;
325 // update internal state
326 memcpy(&t.tracked_object_position, &world_point.x, sizeof(t.tracked_object_position));
327 } else {
328 t.filter->clear_position_tracked_flag();
329 }
330
331 // We are done with the debug frame.
332 t.debug.submit();
333
334 // We are done with the frame.
335 xrt_frame_reference(&xf, NULL);
336
337 if (nearest_world.got_one) {
338#if 0
339 //! @todo something less arbitrary for the lever arm?
340 //! This puts the origin approximately under the PS
341 //! button.
342 xrt_vec3 lever_arm{0.f, 0.09f, 0.f};
343 //! @todo this should depend on distance
344 // Weirdly, this is where *not* applying the
345 // disparity-to-distance/rectification/etc would
346 // simplify things, since the measurement variance is
347 // related to the image sensor. 1.e-4 means 1cm std dev.
348 // Not sure how to estimate the depth variance without
349 // some research.
350 xrt_vec3 variance{1.e-4f, 1.e-4f, 4.e-4f};
351#endif
352 t.filter->process_3d_vision_data(0, &t.tracked_object_position, NULL, NULL,
353 //! @todo tune cutoff for residual arbitrarily "too large"
354 15);
355 } else {
356 t.filter->clear_position_tracked_flag();
357 }
358}
359
360/*!
361 * @brief Tracker processing thread function
362 */
363static void
364run(TrackerPSMV &t)
365{
366 U_TRACE_SET_THREAD_NAME("PSMV");
367
368 struct xrt_frame *frame = NULL;
369
370 os_thread_helper_lock(&t.oth);
371
372 while (os_thread_helper_is_running_locked(&t.oth)) {
373
374 // No data
375 if (!t.has_imu && t.frame == NULL) {
376 os_thread_helper_wait_locked(&t.oth);
377
378 /*
379 * Loop back to the top to check if we should stop,
380 * also handles spurious wakeups by re-checking the
381 * condition in the if case. Essentially two loops.
382 */
383 continue;
384 }
385
386 // Take a reference on the current frame, this keeps it alive
387 // if it is replaced during the consumer processing it, but
388 // we no longer need to hold onto the frame on the queue we
389 // just move the pointer.
390 frame = t.frame;
391 t.frame = NULL;
392
393 // Unlock the mutex when we do the work.
394 os_thread_helper_unlock(&t.oth);
395
396 process(t, frame);
397
398 // Have to lock it again.
399 os_thread_helper_lock(&t.oth);
400 }
401
402 os_thread_helper_unlock(&t.oth);
403}
404
405/*!
406 * @brief Retrieves a pose from the filter.
407 */
408static void
409get_pose(TrackerPSMV &t, enum xrt_input_name name, timepoint_ns when_ns, struct xrt_space_relation *out_relation)
410{
411 os_thread_helper_lock(&t.oth);
412
413 // Don't do anything if we have stopped.
414 if (!os_thread_helper_is_running_locked(&t.oth)) {
415 os_thread_helper_unlock(&t.oth);
416 return;
417 }
418
419 if (name == XRT_INPUT_PSMV_BALL_CENTER_POSE) {
420 out_relation->pose.position = t.tracked_object_position;
421 out_relation->pose.orientation.x = 0.0f;
422 out_relation->pose.orientation.y = 0.0f;
423 out_relation->pose.orientation.z = 0.0f;
424 out_relation->pose.orientation.w = 1.0f;
425
426 out_relation->relation_flags = (enum xrt_space_relation_flags)(XRT_SPACE_RELATION_POSITION_VALID_BIT |
427 XRT_SPACE_RELATION_POSITION_TRACKED_BIT);
428
429 os_thread_helper_unlock(&t.oth);
430 return;
431 }
432
433 t.filter->get_prediction(when_ns, out_relation);
434
435 os_thread_helper_unlock(&t.oth);
436}
437
438static void
439imu_data(TrackerPSMV &t, timepoint_ns timestamp_ns, struct xrt_tracking_sample *sample)
440{
441 os_thread_helper_lock(&t.oth);
442
443 // Don't do anything if we have stopped.
444 if (!os_thread_helper_is_running_locked(&t.oth)) {
445 os_thread_helper_unlock(&t.oth);
446 return;
447 }
448 t.filter->process_imu_data(timestamp_ns, sample, NULL);
449
450 os_thread_helper_unlock(&t.oth);
451}
452
453static void
454frame(TrackerPSMV &t, struct xrt_frame *xf)
455{
456 os_thread_helper_lock(&t.oth);
457
458 // Don't do anything if we have stopped.
459 if (!os_thread_helper_is_running_locked(&t.oth)) {
460 os_thread_helper_unlock(&t.oth);
461 return;
462 }
463
464 xrt_frame_reference(&t.frame, xf);
465 // Wake up the thread.
466 os_thread_helper_signal_locked(&t.oth);
467
468 os_thread_helper_unlock(&t.oth);
469}
470
471static void
472break_apart(TrackerPSMV &t)
473{
474 os_thread_helper_stop_and_wait(&t.oth);
475}
476
477} // namespace xrt::auxiliary::tracking::psmv
478
479using xrt::auxiliary::tracking::psmv::TrackerPSMV;
480
481/*
482 *
483 * C wrapper functions.
484 *
485 */
486
487extern "C" void
488t_psmv_push_imu(struct xrt_tracked_psmv *xtmv, timepoint_ns timestamp_ns, struct xrt_tracking_sample *sample)
489{
490 auto &t = *container_of(xtmv, TrackerPSMV, base);
491 imu_data(t, timestamp_ns, sample);
492}
493
494extern "C" void
495t_psmv_get_tracked_pose(struct xrt_tracked_psmv *xtmv,
496 enum xrt_input_name name,
497 timepoint_ns when_ns,
498 struct xrt_space_relation *out_relation)
499{
500 auto &t = *container_of(xtmv, TrackerPSMV, base);
501 get_pose(t, name, when_ns, out_relation);
502}
503
504extern "C" void
505t_psmv_fake_destroy(struct xrt_tracked_psmv *xtmv)
506{
507 auto &t = *container_of(xtmv, TrackerPSMV, base);
508 (void)t;
509 // Not the real destroy function
510}
511
512extern "C" void
513t_psmv_sink_push_frame(struct xrt_frame_sink *xsink, struct xrt_frame *xf)
514{
515 auto &t = *container_of(xsink, TrackerPSMV, sink);
516 frame(t, xf);
517}
518
519extern "C" void
520t_psmv_node_break_apart(struct xrt_frame_node *node)
521{
522 auto &t = *container_of(node, TrackerPSMV, node);
523 break_apart(t);
524}
525
526extern "C" void
527t_psmv_node_destroy(struct xrt_frame_node *node)
528{
529 auto *t_ptr = container_of(node, TrackerPSMV, node);
530 os_thread_helper_destroy(&t_ptr->oth);
531
532 // Tidy variable setup.
533 u_var_remove_root(t_ptr);
534
535 delete t_ptr;
536}
537
538extern "C" void *
539t_psmv_run(void *ptr)
540{
541 auto &t = *(TrackerPSMV *)ptr;
542 run(t);
543 return NULL;
544}
545
546
547/*
548 *
549 * Exported functions.
550 *
551 */
552
553extern "C" int
554t_psmv_start(struct xrt_tracked_psmv *xtmv)
555{
556 auto &t = *container_of(xtmv, TrackerPSMV, base);
557 return os_thread_helper_start(&t.oth, t_psmv_run, &t);
558}
559
560extern "C" int
561t_psmv_create(struct xrt_frame_context *xfctx,
562 struct xrt_colour_rgb_f32 *rgb,
563 struct t_stereo_camera_calibration *data,
564 struct xrt_tracked_psmv **out_xtmv,
565 struct xrt_frame_sink **out_sink)
566{
567 XRT_TRACE_MARKER();
568
569 U_LOG_D("Creating PSMV tracker.");
570
571 auto &t = *(new TrackerPSMV());
572 int ret;
573
574 t.base.get_tracked_pose = t_psmv_get_tracked_pose;
575 t.base.push_imu = t_psmv_push_imu;
576 t.base.destroy = t_psmv_fake_destroy;
577 t.base.colour = *rgb;
578 t.sink.push_frame = t_psmv_sink_push_frame;
579 t.node.break_apart = t_psmv_node_break_apart;
580 t.node.destroy = t_psmv_node_destroy;
581 t.fusion.rot.x = 0.0f;
582 t.fusion.rot.y = 0.0f;
583 t.fusion.rot.z = 0.0f;
584 t.fusion.rot.w = 1.0f;
585 t.filter = PSMVFusionInterface::create();
586
587 ret = os_thread_helper_init(&t.oth);
588 if (ret != 0) {
589 delete (&t);
590 return ret;
591 }
592
593 static int hack = 0;
594 switch (hack++) {
595 case 0:
596 t.fusion.pos.x = -0.3f;
597 t.fusion.pos.y = 1.3f;
598 t.fusion.pos.z = -0.5f;
599 break;
600 case 1:
601 t.fusion.pos.x = 0.3f;
602 t.fusion.pos.y = 1.3f;
603 t.fusion.pos.z = -0.5f;
604 break;
605 default:
606 t.fusion.pos.x = 0.0f;
607 t.fusion.pos.y = 0.8f + hack * 0.1f;
608 t.fusion.pos.z = -0.5f;
609 break;
610 }
611
612 StereoRectificationMaps rectify(data);
613 t.view[0].populate_from_calib(data->view[0], rectify.view[0].rectify);
614 t.view[1].populate_from_calib(data->view[1], rectify.view[1].rectify);
615 t.disparity_to_depth = rectify.disparity_to_depth_mat;
616 StereoCameraCalibrationWrapper wrapped(data);
617 t.r_cam_rotation = wrapped.camera_rotation_mat;
618 t.r_cam_translation = wrapped.camera_translation_mat;
619 t.calibrated = true;
620
621 // clang-format off
622 cv::SimpleBlobDetector::Params blob_params;
623 blob_params.filterByArea = false;
624 blob_params.filterByConvexity = true;
625 blob_params.minConvexity = 0.8;
626 blob_params.filterByInertia = false;
627 blob_params.filterByColor = true;
628 blob_params.blobColor = 255; // 0 or 255 - color comes from binarized image?
629 blob_params.minArea = 1;
630 blob_params.maxArea = 1000;
631 blob_params.maxThreshold = 51; // using a wide threshold span slows things down bigtime
632 blob_params.minThreshold = 50;
633 blob_params.thresholdStep = 1;
634 blob_params.minDistBetweenBlobs = 5;
635 blob_params.minRepeatability = 1; // need this to avoid error?
636 // clang-format on
637
638 t.sbd = cv::SimpleBlobDetector::create(blob_params);
639 xrt_frame_context_add(xfctx, &t.node);
640
641 // Everything is safe, now setup the variable tracking.
642 u_var_add_root(&t, "PSMV Tracker", true);
643 u_var_add_vec3_f32(&t, &t.tracked_object_position, "last.ball.pos");
644 u_var_add_sink_debug(&t, &t.debug.usd, "Debug");
645
646 *out_sink = &t.sink;
647 *out_xtmv = &t.base;
648
649 return 0;
650}