A research repository into geolabels, not for wide use yet
1(** AOH worked example: {i Panthera leo} in the Serengeti ecosystem.
2
3 Demonstrates the full label pipeline from raw observations
4 through synthetic simulation to Area of Habitat calculation,
5 integrating data from:
6
7 - Camera traps (Serengeti Lion Project grid)
8 - GPS collars (Movebank study 1234)
9 - GBIF occurrence records
10 - iNaturalist citizen science observations
11 - IUCN Red List expert range and habitat preferences
12 - Lotka-Volterra population simulation (synthetic)
13 - TESSERA v3.1 habitat classification
14
15 The provenance graph:
16 {v
17 AOH polygon
18 ├── species_range (alpha-shape, measured-only)
19 │ ├── camera trap detections
20 │ ├── GPS collar fixes (Movebank)
21 │ ├── GBIF occurrences
22 │ └── iNaturalist observations
23 ├── IUCN expert range (validation)
24 └── habitat suitability tiles (TESSERA)
25 └── training set
26 ├── all measured occurrences
27 ├── IUCN habitat preferences
28 └── synthetic augmentation (Lotka-Volterra)
29 v} *)
30
31open Terradots
32
33let ed = event_date_of_string
34let c = cell_of_string
35
36(* ══════════════════════════════════════════════════════════
37 1. Activities — the audit trail
38
39 Each activity links a batch of labels to who/what produced
40 them and when. The [agent] field points to Fairground
41 notebook URIs where applicable.
42 ══════════════════════════════════════════════════════════ *)
43
44let act_field_survey =
45 { activity_id = "act-field-2024";
46 agent = "orcid:0000-0002-1234-5678";
47 date = "2024-06-15T08:00:00Z";
48 description = Some "Serengeti Lion Project 2024 dry-season \
49 camera trap survey" }
50
51let act_movebank_import =
52 { activity_id = "act-movebank-import";
53 agent = "fairground:notebook/movebank-ingest:v2";
54 date = "2024-07-01T12:00:00Z";
55 description = Some "Bulk import of GPS collar data from \
56 Movebank study 1234, individuals leo-007 \
57 and leo-012" }
58
59let act_gbif_import =
60 { activity_id = "act-gbif-import";
61 agent = "fairground:notebook/gbif-ingest:v3";
62 date = "2024-07-02T10:00:00Z";
63 description = Some "GBIF Panthera leo occurrences, East Africa, \
64 2020-2024" }
65
66let act_inat_import =
67 { activity_id = "act-inat-import";
68 agent = "fairground:notebook/inat-ingest:v1";
69 date = "2024-07-02T14:00:00Z";
70 description = Some "iNaturalist research-grade P. leo observations, \
71 Serengeti-Mara ecosystem" }
72
73let act_iucn_import =
74 { activity_id = "act-iucn-import";
75 agent = "fairground:notebook/iucn-ingest:v1";
76 date = "2024-07-03T09:00:00Z";
77 description = Some "IUCN Red List Panthera leo assessment: expert \
78 range polygon and habitat preference codes" }
79
80let act_simulation =
81 { activity_id = "act-sim-lv-001";
82 agent = "fairground:notebook/lotka-volterra-serengeti:v4@cell-7";
83 date = "2024-07-10T16:00:00Z";
84 description = Some "Lotka-Volterra predator-prey simulation, \
85 lion-zebra-wildebeest, Serengeti parameterisation, \
86 100-year projection, seed=42" }
87
88let act_training_set =
89 { activity_id = "act-training-2024";
90 agent = "fairground:notebook/sdm-training:v2";
91 date = "2024-07-15T10:00:00Z";
92 description = Some "Assemble training set for P. leo SDM: \
93 balanced spatial sample with synthetic \
94 augmentation from Lotka-Volterra run" }
95
96let act_habitat =
97 { activity_id = "act-habitat-2024";
98 agent = "fairground:notebook/habitat-classify:v3";
99 date = "2024-07-16T09:00:00Z";
100 description = Some "Habitat suitability classification from \
101 TESSERA v3.1 land-cover embeddings, \
102 thresholded against IUCN habitat codes" }
103
104let act_range =
105 { activity_id = "act-range-2024";
106 agent = "fairground:notebook/species-range:v2";
107 date = "2024-07-16T11:00:00Z";
108 description = Some "Alpha-shape species range from all verified \
109 occurrences (measured-only, no synthetic)" }
110
111let act_aoh =
112 { activity_id = "act-aoh-2024";
113 agent = "fairground:notebook/aoh-iucn:v3";
114 date = "2024-07-16T14:00:00Z";
115 description = Some "IUCN Area of Habitat: species range intersected \
116 with suitable habitat tiles" }
117
118(* ══════════════════════════════════════════════════════════
119 2. Camera trap observations — Serengeti Lion Project
120
121 Fixed sensors in the Serengeti NP grid. Each trigger
122 produces a Point at the trap's surveyed coordinates.
123 Hilbert cells b7a–b7f cover the Serengeti at level 12.
124 ══════════════════════════════════════════════════════════ *)
125
126let trap_01 =
127 make_point
128 ~cell:(c "b7a") ~id:"ct-001"
129 ~x:34.82 ~y:(-2.33)
130 ~observer:"urn:sensor:camera-trap:serengeti-node-17"
131 ~class_dist:[("Panthera leo", 1.0)] ~accuracy_m:5.0 ~confidence:0.97
132 ~event_date:(ed "2024-06-12T05:42:00Z")
133 ~activity:"act-field-2024"
134 ~properties:[
135 ("image_uri", "s3://slp/ct17/IMG_4821.jpg");
136 ("individual_count", "3");
137 ("behaviour", "resting")]
138 ()
139
140let trap_02 =
141 make_point
142 ~cell:(c "b7a") ~id:"ct-002"
143 ~x:34.83 ~y:(-2.32)
144 ~observer:"urn:sensor:camera-trap:serengeti-node-17"
145 ~class_dist:[("Panthera leo", 1.0)] ~accuracy_m:5.0 ~confidence:0.92
146 ~event_date:(ed "2024-06-14T19:15:00Z")
147 ~activity:"act-field-2024"
148 ~properties:[
149 ("image_uri", "s3://slp/ct17/IMG_4903.jpg");
150 ("individual_count", "1");
151 ("behaviour", "walking")]
152 ()
153
154let trap_03 =
155 make_point
156 ~cell:(c "b7c") ~id:"ct-003"
157 ~x:35.01 ~y:(-2.15)
158 ~observer:"urn:sensor:camera-trap:serengeti-node-42"
159 ~class_dist:[("Panthera leo", 1.0)] ~accuracy_m:5.0 ~confidence:0.88
160 ~event_date:(ed "2024-06-18T03:22:00Z")
161 ~activity:"act-field-2024"
162 ~properties:[
163 ("image_uri", "s3://slp/ct42/IMG_1207.jpg");
164 ("individual_count", "2")]
165 ()
166
167(** Non-detection: trap triggered by motion but no lion present.
168 These matter for occupancy models — absence data is data. *)
169let trap_04 =
170 make_point
171 ~cell:(c "b7d") ~id:"ct-004"
172 ~x:35.22 ~y:(-2.45)
173 ~observer:"urn:sensor:camera-trap:serengeti-node-55"
174 ~event_date:(ed "2024-06-20T22:10:00Z")
175 ~activity:"act-field-2024"
176 ~properties:[
177 ("image_uri", "s3://slp/ct55/IMG_0891.jpg");
178 ("trigger", "motion");
179 ("species_detected", "none")]
180 ()
181
182(* ══════════════════════════════════════════════════════════
183 3. GPS collar tracks — Movebank study 1234
184
185 Imported via the Movebank registry. Each fix is a Point
186 with the collar as observer and the Movebank event URI as
187 the registry record ([via]).
188 ══════════════════════════════════════════════════════════ *)
189
190(** Individual leo-007: three fixes showing movement NE. *)
191let gps_01 =
192 make_imported
193 ~cell:(c "b7a") ~id:"gps-001"
194 ~geometry:(Point { x = 34.81; y = -2.34 })
195 ~via:"movebank:study/1234/individual/leo-007/event/98001"
196 ~observer:"urn:sensor:gps:vectronic-vertex-plus-007"
197 ~license:"CC-BY-NC-4.0"
198 ~accuracy_m:3.5
199 ~class_dist:[("Panthera leo", 1.0)]
200 ~event_date:(ed "2024-06-10T06:00:00Z")
201 ~activity:"act-movebank-import"
202 ~properties:[
203 ("individual_id", "leo-007");
204 ("fix_type", "3D"); ("hdop", "0.9")]
205 ()
206
207let gps_02 =
208 make_imported
209 ~cell:(c "b7a") ~id:"gps-002"
210 ~geometry:(Point { x = 34.84; y = -2.31 })
211 ~via:"movebank:study/1234/individual/leo-007/event/98002"
212 ~observer:"urn:sensor:gps:vectronic-vertex-plus-007"
213 ~license:"CC-BY-NC-4.0"
214 ~accuracy_m:4.2
215 ~class_dist:[("Panthera leo", 1.0)]
216 ~event_date:(ed "2024-06-10T12:00:00Z")
217 ~activity:"act-movebank-import"
218 ~properties:[
219 ("individual_id", "leo-007");
220 ("fix_type", "3D"); ("hdop", "1.1")]
221 ()
222
223let gps_03 =
224 make_imported
225 ~cell:(c "b7b") ~id:"gps-003"
226 ~geometry:(Point { x = 34.91; y = -2.28 })
227 ~via:"movebank:study/1234/individual/leo-007/event/98003"
228 ~observer:"urn:sensor:gps:vectronic-vertex-plus-007"
229 ~license:"CC-BY-NC-4.0"
230 ~accuracy_m:5.1
231 ~class_dist:[("Panthera leo", 1.0)]
232 ~event_date:(ed "2024-06-11T06:00:00Z")
233 ~activity:"act-movebank-import"
234 ~properties:[
235 ("individual_id", "leo-007");
236 ("fix_type", "3D"); ("hdop", "1.4")]
237 ()
238
239(** Individual leo-012: separate pride, further east. *)
240let gps_04 =
241 make_imported
242 ~cell:(c "b7c") ~id:"gps-004"
243 ~geometry:(Point { x = 35.05; y = -2.10 })
244 ~via:"movebank:study/1234/individual/leo-012/event/98501"
245 ~observer:"urn:sensor:gps:vectronic-vertex-plus-012"
246 ~license:"CC-BY-NC-4.0"
247 ~accuracy_m:3.0
248 ~class_dist:[("Panthera leo", 1.0)]
249 ~event_date:(ed "2024-06-12T06:00:00Z")
250 ~activity:"act-movebank-import"
251 ~properties:[("individual_id", "leo-012")]
252 ()
253
254(* ══════════════════════════════════════════════════════════
255 4. GBIF occurrence records
256
257 Museum specimens and field surveys aggregated through GBIF.
258 Note the varying accuracy — the 2021 record has 500 m
259 uncertainty (flagged for review in annotations).
260 ══════════════════════════════════════════════════════════ *)
261
262let gbif_01 =
263 make_imported
264 ~cell:(c "b7a") ~id:"gbif-001"
265 ~geometry:(Point { x = 34.85; y = -2.35 })
266 ~via:"gbif:4023589127"
267 ~license:"CC-BY-4.0"
268 ~accuracy_m:100.0
269 ~class_dist:[("Panthera leo", 1.0)]
270 ~event_date:(ed "2022-08-14")
271 ~activity:"act-gbif-import"
272 ~properties:[
273 ("gbif_dataset", "serengeti-biodiversity-survey");
274 ("basis_of_record", "HUMAN_OBSERVATION");
275 ("recorded_by", "Tanzania Wildlife Research Institute")]
276 ()
277
278let gbif_02 =
279 make_imported
280 ~cell:(c "b7e") ~id:"gbif-002"
281 ~geometry:(Point { x = 35.40; y = -2.50 })
282 ~via:"gbif:4023589999"
283 ~license:"CC-BY-4.0"
284 ~accuracy_m:500.0
285 ~class_dist:[("Panthera leo", 1.0)]
286 ~event_date:(ed "2021")
287 ~activity:"act-gbif-import"
288 ~properties:[
289 ("gbif_dataset", "ngorongoro-mammal-survey");
290 ("basis_of_record", "HUMAN_OBSERVATION")]
291 ()
292
293(* ══════════════════════════════════════════════════════════
294 5. iNaturalist citizen science
295
296 Research-grade observations from the iNaturalist platform.
297 The observer is a user URI; the record is the observation URI.
298 ══════════════════════════════════════════════════════════ *)
299
300let inat_01 =
301 make_imported
302 ~cell:(c "b7b") ~id:"inat-001"
303 ~geometry:(Point { x = 34.95; y = -2.20 })
304 ~via:"inaturalist:observation/182345678"
305 ~observer:"inaturalist:user/safari_dave"
306 ~license:"CC-BY-NC-4.0"
307 ~accuracy_m:50.0
308 ~class_dist:[("Panthera leo", 1.0)]
309 ~confidence:0.95
310 ~event_date:(ed "2023-07-22T16:30:00Z")
311 ~activity:"act-inat-import"
312 ~properties:[
313 ("quality_grade", "research");
314 ("num_identifications", "5")]
315 ()
316
317(* ══════════════════════════════════════════════════════════
318 6. IUCN Red List — expert range and habitat preferences
319
320 The IUCN assessment provides two things:
321 (a) An expert-drawn range polygon for the species.
322 (b) Habitat preference codes (IUCN Habitats Classification
323 Scheme) with suitability ratings.
324
325 The range polygon validates the data-driven range; the
326 habitat codes drive the suitability classification.
327 ══════════════════════════════════════════════════════════ *)
328
329(** Expert-drawn range polygon (simplified to bounding extent). *)
330let iucn_range =
331 make_imported
332 ~cell:(c "b70") ~id:"iucn-range-001"
333 ~geometry:(Polygon [
334 { x = 34.0; y = -3.0 };
335 { x = 36.0; y = -3.0 };
336 { x = 36.0; y = -1.0 };
337 { x = 34.0; y = -1.0 };
338 { x = 34.0; y = -3.0 };
339 ])
340 ~via:"iucn:redlist:22/Panthera-leo:range:2024.1"
341 ~license:"CC-BY-NC-4.0"
342 ~class_dist:[("Panthera leo", 1.0)]
343 ~event_date:(ed "2024")
344 ~activity:"act-iucn-import"
345 ~properties:[
346 ("iucn_status", "VU");
347 ("iucn_criteria", "A2abcd");
348 ("population_trend", "decreasing");
349 ("range_type", "extant:resident");
350 ("habitat_codes", "1.5;1.6;2;3;14.1")]
351 ()
352
353(** Habitat preference: savanna (IUCN code 2) — major habitat. *)
354let iucn_hab_savanna =
355 make_imported
356 ~cell:(c "b70") ~id:"iucn-hab-001"
357 ~geometry:(Point { x = 35.0; y = -2.0 })
358 ~via:"iucn:redlist:22/Panthera-leo:habitat:2"
359 ~license:"CC-BY-NC-4.0"
360 ~class_dist:[("habitat-preference:savanna", 1.0)]
361 ~confidence:0.95
362 ~activity:"act-iucn-import"
363 ~properties:[
364 ("iucn_habitat_code", "2");
365 ("suitability", "Suitable");
366 ("major_importance", "Yes")]
367 ()
368
369(** Habitat preference: shrubland (IUCN code 3) — minor habitat. *)
370let iucn_hab_shrubland =
371 make_imported
372 ~cell:(c "b70") ~id:"iucn-hab-002"
373 ~geometry:(Point { x = 35.0; y = -2.0 })
374 ~via:"iucn:redlist:22/Panthera-leo:habitat:3"
375 ~license:"CC-BY-NC-4.0"
376 ~class_dist:[("habitat-preference:shrubland", 1.0)]
377 ~confidence:0.70
378 ~activity:"act-iucn-import"
379 ~properties:[
380 ("iucn_habitat_code", "3");
381 ("suitability", "Suitable");
382 ("major_importance", "No")]
383 ()
384
385(* ══════════════════════════════════════════════════════════
386 7. Synthetic simulation — Lotka-Volterra population dynamics
387
388 Agent-based Lotka-Volterra model producing simulated lion
389 positions in under-sampled areas (Ngorongoro corridor).
390 These augment the SDM training set but are NEVER included
391 in the measured species range.
392
393 The [Simulated] origin keeps them type-level distinct from
394 real observations. Properties carry the scenario parameters
395 for reproducibility.
396 ══════════════════════════════════════════════════════════ *)
397
398let sim_01 =
399 make_simulated
400 ~cell:(c "b7d") ~id:"sim-001"
401 ~geometry:(Point { x = 35.20; y = -2.50 })
402 ~model:"fairground:notebook/lotka-volterra-serengeti:v4"
403 ~run_id:"lv-run-42"
404 ~class_dist:[("Panthera leo", 1.0)]
405 ~event_date:(ed "2024-06-15T00:00:00Z")
406 ~confidence:0.60
407 ~activity:"act-sim-lv-001"
408 ~properties:[
409 ("scenario", "baseline-2024");
410 ("time_step", "150");
411 ("prey_density_km2", "45.2");
412 ("seed", "42")]
413 ()
414
415let sim_02 =
416 make_simulated
417 ~cell:(c "b7d") ~id:"sim-002"
418 ~geometry:(Point { x = 35.18; y = -2.48 })
419 ~model:"fairground:notebook/lotka-volterra-serengeti:v4"
420 ~run_id:"lv-run-42"
421 ~class_dist:[("Panthera leo", 1.0)]
422 ~event_date:(ed "2024-06-15T06:00:00Z")
423 ~confidence:0.60
424 ~activity:"act-sim-lv-001"
425 ~properties:[
426 ("scenario", "baseline-2024");
427 ("time_step", "151");
428 ("prey_density_km2", "44.8");
429 ("seed", "42")]
430 ()
431
432(** Drought scenario — prey density drops, lion shifts south. *)
433let sim_03 =
434 make_simulated
435 ~cell:(c "b7e") ~id:"sim-003"
436 ~geometry:(Point { x = 35.45; y = -2.55 })
437 ~model:"fairground:notebook/lotka-volterra-serengeti:v4"
438 ~run_id:"lv-run-42"
439 ~class_dist:[("Panthera leo", 1.0)]
440 ~event_date:(ed "2024-06-16T00:00:00Z")
441 ~confidence:0.55
442 ~activity:"act-sim-lv-001"
443 ~properties:[
444 ("scenario", "drought-2024");
445 ("time_step", "152");
446 ("prey_density_km2", "28.1");
447 ("seed", "42")]
448 ()
449
450(* ══════════════════════════════════════════════════════════
451 8. Derivation: training set assembly
452
453 The training set is itself a derived label — it records
454 exactly which labels (measured + synthetic) were selected
455 for model training, and the synthetic fraction.
456
457 This is the provenance anchor for the SDM: you can always
458 ask "which observations trained this model?"
459 ══════════════════════════════════════════════════════════ *)
460
461let all_measured_ids =
462 [ "ct-001"; "ct-002"; "ct-003";
463 "gps-001"; "gps-002"; "gps-003"; "gps-004";
464 "gbif-001"; "gbif-002";
465 "inat-001" ]
466
467let all_synthetic_ids =
468 [ "sim-001"; "sim-002"; "sim-003" ]
469
470let training_set =
471 make_derived
472 ~cell:(c "b70") ~id:"ts-001"
473 ~geometry:(Polygon [
474 { x = 34.0; y = -3.0 };
475 { x = 36.0; y = -3.0 };
476 { x = 36.0; y = -1.0 };
477 { x = 34.0; y = -1.0 };
478 { x = 34.0; y = -3.0 };
479 ])
480 ~sources:(all_measured_ids @ all_synthetic_ids)
481 ~method_:"training-set:balanced-spatial-sample"
482 ~class_dist:[("training-set:Panthera-leo:sdm-2024", 1.0)]
483 ~activity:"act-training-2024"
484 ~properties:[
485 ("n_measured", string_of_int (List.length all_measured_ids));
486 ("n_synthetic", string_of_int (List.length all_synthetic_ids));
487 ("synthetic_fraction", "0.23");
488 ("spatial_extent", "34.0,-3.0,36.0,-1.0");
489 ("temporal_window", "2021/2024");
490 ("tessera_model", "tessera:v3.1:east-africa")]
491 ()
492
493(* ══════════════════════════════════════════════════════════
494 9. Derivation: habitat suitability from TESSERA
495
496 Each TESSERA tile is classified as suitable or unsuitable
497 based on its land-cover embedding and the IUCN habitat
498 preference codes. The [sources] link back to the IUCN
499 habitat labels.
500 ══════════════════════════════════════════════════════════ *)
501
502let hab_sources = ["iucn-hab-001"; "iucn-hab-002"]
503
504(** Core Serengeti savanna — highly suitable. *)
505let hab_01 =
506 make_derived
507 ~cell:(c "b7a") ~id:"hab-001"
508 ~geometry:(Polygon [
509 { x = 34.80; y = -2.40 };
510 { x = 34.90; y = -2.40 };
511 { x = 34.90; y = -2.30 };
512 { x = 34.80; y = -2.30 };
513 { x = 34.80; y = -2.40 };
514 ])
515 ~sources:hab_sources
516 ~method_:"habitat-classify:tessera-v3.1:threshold-0.6"
517 ~confidence:0.91
518 ~class_dist:[("savanna", 0.78); ("shrubland", 0.13); ("other", 0.09)]
519 ~activity:"act-habitat-2024"
520 ~properties:[
521 ("tessera_tile", "b7a:034.80:-002.40");
522 ("dominant_landcover", "savanna")]
523 ()
524
525(** Savanna-shrubland mosaic — moderate suitability. *)
526let hab_02 =
527 make_derived
528 ~cell:(c "b7d") ~id:"hab-002"
529 ~geometry:(Polygon [
530 { x = 35.10; y = -2.60 };
531 { x = 35.20; y = -2.60 };
532 { x = 35.20; y = -2.50 };
533 { x = 35.10; y = -2.50 };
534 { x = 35.10; y = -2.60 };
535 ])
536 ~sources:hab_sources
537 ~method_:"habitat-classify:tessera-v3.1:threshold-0.6"
538 ~confidence:0.68
539 ~class_dist:[("savanna", 0.45); ("shrubland", 0.30); ("cropland", 0.25)]
540 ~activity:"act-habitat-2024"
541 ~properties:[
542 ("tessera_tile", "b7d:035.10:-002.60");
543 ("dominant_landcover", "savanna-shrubland-mosaic")]
544 ()
545
546(** Agricultural land — unsuitable, excluded from AOH. *)
547let hab_03 =
548 make_derived
549 ~cell:(c "b7f") ~id:"hab-003"
550 ~geometry:(Polygon [
551 { x = 35.80; y = -1.20 };
552 { x = 35.90; y = -1.20 };
553 { x = 35.90; y = -1.10 };
554 { x = 35.80; y = -1.10 };
555 { x = 35.80; y = -1.20 };
556 ])
557 ~sources:hab_sources
558 ~method_:"habitat-classify:tessera-v3.1:threshold-0.6"
559 ~confidence:0.12
560 ~class_dist:[("cropland", 0.72); ("settlement", 0.18); ("savanna", 0.10)]
561 ~activity:"act-habitat-2024"
562 ~properties:[
563 ("tessera_tile", "b7f:035.80:-001.20");
564 ("dominant_landcover", "cropland")]
565 ()
566
567(* ══════════════════════════════════════════════════════════
568 10. Derivation: species range from occurrences
569
570 Alpha-shape computed from measured-only data. Synthetic
571 labels are explicitly excluded — the range must reflect
572 where lions have actually been observed.
573
574 The [is_simulated] accessor is used by the range pipeline
575 to filter out synthetic augmentation.
576 ══════════════════════════════════════════════════════════ *)
577
578let species_range =
579 make_derived
580 ~cell:(c "b70") ~id:"range-001"
581 ~geometry:(Polygon [
582 { x = 34.75; y = -2.60 };
583 { x = 35.50; y = -2.60 };
584 { x = 35.50; y = -2.00 };
585 { x = 35.10; y = -1.90 };
586 { x = 34.75; y = -2.10 };
587 { x = 34.75; y = -2.60 };
588 ])
589 ~sources:all_measured_ids (* no sim-* labels *)
590 ~method_:"alpha-shape:alpha-0.005"
591 ~class_dist:[("range:Panthera leo", 1.0)]
592 ~activity:"act-range-2024"
593 ~properties:[
594 ("range_km2", "4850");
595 ("n_occurrences", string_of_int (List.length all_measured_ids));
596 ("excludes_synthetic", "true")]
597 ()
598
599(* ══════════════════════════════════════════════════════════
600 11. Derivation: Area of Habitat
601
602 The final AOH is the intersection of:
603 - the data-driven species range (measured-only)
604 - the TESSERA habitat suitability tiles (suitable only)
605 - validated against the IUCN expert range
606
607 The result is a Multi polygon — disconnected habitat
608 patches within the range. Properties carry the IUCN
609 assessment metadata and the key metrics.
610
611 When TESSERA is retrained (v3.1 → v3.2), the habitat
612 tiles change, so AOH recomputes. The new AOH label gets
613 a new activity; both versions coexist for comparison.
614 ══════════════════════════════════════════════════════════ *)
615
616let aoh =
617 make_derived
618 ~cell:(c "b70") ~id:"aoh-001"
619 ~geometry:(Multi [
620 (* Patch 1: core Serengeti savanna *)
621 Polygon [
622 { x = 34.80; y = -2.40 };
623 { x = 35.20; y = -2.40 };
624 { x = 35.20; y = -2.10 };
625 { x = 34.80; y = -2.10 };
626 { x = 34.80; y = -2.40 };
627 ];
628 (* Patch 2: southern extension into Ngorongoro *)
629 Polygon [
630 { x = 35.10; y = -2.60 };
631 { x = 35.40; y = -2.60 };
632 { x = 35.40; y = -2.40 };
633 { x = 35.10; y = -2.40 };
634 { x = 35.10; y = -2.60 };
635 ];
636 ])
637 ~sources:[
638 "range-001"; (* data-driven species range *)
639 "iucn-range-001"; (* IUCN expert range — validation *)
640 "hab-001"; "hab-002"; (* suitable habitat tiles *)
641 (* hab-003 excluded: unsuitable cropland *)
642 ]
643 ~method_:"aoh:iucn-2022:range-intersect-habitat"
644 ~class_dist:[("aoh:Panthera leo", 1.0)]
645 ~activity:"act-aoh-2024"
646 ~properties:[
647 (* AOH metrics *)
648 ("aoh_km2", "3420");
649 ("range_km2", "4850");
650 ("habitat_proportion", "0.705");
651 ("unsuitable_excluded_km2", "1430");
652 ("dominant_exclusion", "cropland");
653 (* IUCN assessment context *)
654 ("iucn_status", "VU");
655 ("iucn_criteria", "A2abcd");
656 ("population_trend", "decreasing");
657 (* Model provenance *)
658 ("tessera_model", "tessera:v3.1:east-africa");
659 ("synthetic_in_sdm_training", "true");
660 ("synthetic_fraction_in_training", "0.23")]
661 ()
662
663(* ══════════════════════════════════════════════════════════
664 12. Document assembly
665 ══════════════════════════════════════════════════════════ *)
666
667let doc =
668 { crs = wgs84;
669 level = 12;
670 provenance = [
671 act_field_survey;
672 act_movebank_import;
673 act_gbif_import;
674 act_inat_import;
675 act_iucn_import;
676 act_simulation;
677 act_training_set;
678 act_habitat;
679 act_range;
680 act_aoh;
681 ];
682 labels = [
683 (* Camera traps *)
684 trap_01; trap_02; trap_03; trap_04;
685 (* GPS collars — Movebank *)
686 gps_01; gps_02; gps_03; gps_04;
687 (* GBIF *)
688 gbif_01; gbif_02;
689 (* iNaturalist *)
690 inat_01;
691 (* IUCN Red List *)
692 iucn_range; iucn_hab_savanna; iucn_hab_shrubland;
693 (* Synthetic — Lotka-Volterra *)
694 sim_01; sim_02; sim_03;
695 (* Derivations *)
696 training_set;
697 hab_01; hab_02; hab_03;
698 species_range;
699 aoh;
700 ];
701 annotations = [
702 { id = "ann-001";
703 text = "Camera trap ct-001 and GPS fix gps-001 are 1.4 km \
704 apart on the same day — likely same pride. Consider \
705 merge after dry-season survey completes.";
706 anchors = ["ct-001"; "gps-001"] };
707 { id = "ann-002";
708 text = "GBIF gbif-002 has 500 m uncertainty and only year-level \
709 temporal precision. Flag for review before including \
710 in high-resolution analyses.";
711 anchors = ["gbif-002"] };
712 { id = "ann-003";
713 text = "Synthetic labels sim-001..sim-003 augment the under-sampled \
714 Ngorongoro corridor. Weight reduced to 0.5x in training \
715 set assembly. Not included in species range computation.";
716 anchors = ["sim-001"; "sim-002"; "sim-003"] };
717 { id = "ann-004";
718 text = "AOH shows 70.5% of range is suitable habitat. Main \
719 exclusion is cropland encroachment on the eastern boundary. \
720 Compare with IUCN 2019 assessment (was 78%).";
721 anchors = ["aoh-001"] };
722 ];
723 groups = [
724 { id = "grp-field-2024";
725 activity = Some "act-field-2024";
726 members = ["ct-001"; "ct-002"; "ct-003"; "ct-004"] };
727 { id = "grp-leo-007-track";
728 activity = Some "act-movebank-import";
729 members = ["gps-001"; "gps-002"; "gps-003"] };
730 { id = "grp-leo-012-track";
731 activity = Some "act-movebank-import";
732 members = ["gps-004"] };
733 { id = "grp-synthetic-lv42";
734 activity = Some "act-sim-lv-001";
735 members = ["sim-001"; "sim-002"; "sim-003"] };
736 { id = "grp-iucn-habitat-prefs";
737 activity = Some "act-iucn-import";
738 members = ["iucn-hab-001"; "iucn-hab-002"] };
739 ];
740 }
741
742(* ══════════════════════════════════════════════════════════
743 13. Queries — demonstrating the provenance graph
744
745 These functions show how a wiki renderer or analysis
746 pipeline would traverse the label graph.
747 ══════════════════════════════════════════════════════════ *)
748
749(** Find a label by ID. *)
750let find id =
751 List.find (fun (l : label) -> l.id = id) doc.labels
752
753(** All labels in a Hilbert cell. *)
754let in_cell c =
755 List.filter (fun (l : label) -> l.cell = c) doc.labels
756
757(** All measured (non-synthetic, non-derived) labels. *)
758let measured_only () =
759 List.filter (fun (l : label) ->
760 match l.origin with Measured _ -> true | _ -> false)
761 doc.labels
762
763(** All simulated labels. *)
764let synthetic_only () =
765 List.filter is_simulated doc.labels
766
767(** Immediate sources of a derived label. *)
768let sources_of_label l =
769 List.filter_map
770 (fun src_id ->
771 match List.find_opt (fun (l : label) -> l.id = src_id) doc.labels with
772 | Some src -> Some src
773 | None -> None)
774 (sources_of l)
775
776(** Transitive closure: all labels reachable through [sources]. *)
777let rec all_ancestors l =
778 let immediate = sources_of_label l in
779 let deeper = List.concat_map all_ancestors immediate in
780 immediate @ deeper
781
782(** How many synthetic labels influenced this derivation? *)
783let synthetic_ancestor_count l =
784 all_ancestors l
785 |> List.filter is_simulated
786 |> List.length
787
788(** Activity record for a label. *)
789let activity_of (l : label) =
790 match l.activity with
791 | None -> None
792 | Some aid ->
793 List.find_opt (fun a -> a.activity_id = aid) doc.provenance
794
795(* ══════════════════════════════════════════════════════════
796 14. Main — exercise the provenance queries
797 ══════════════════════════════════════════════════════════ *)
798
799let () =
800 let n_labels = List.length doc.labels in
801 let n_measured = List.length (measured_only ()) in
802 let n_synthetic = List.length (synthetic_only ()) in
803 let n_derived = n_labels - n_measured - n_synthetic in
804 Printf.printf "Terradots AOH Example: Panthera leo, Serengeti\n";
805 Printf.printf "══════════════════════════════════════════════\n";
806 Printf.printf "CRS: %s Hilbert level: %d\n" doc.crs doc.level;
807 Printf.printf "Labels: %d total (%d measured, %d synthetic, %d derived)\n"
808 n_labels n_measured n_synthetic n_derived;
809 Printf.printf "Activities: %d\n" (List.length doc.provenance);
810 Printf.printf "Annotations: %d\n" (List.length doc.annotations);
811 Printf.printf "Groups: %d\n\n" (List.length doc.groups);
812
813 (* AOH provenance *)
814 let aoh_label = find "aoh-001" in
815 Printf.printf "AOH label: %s\n" (label_name aoh_label);
816 let props key =
817 List.assoc_opt key aoh_label.properties
818 |> Option.value ~default:"?" in
819 Printf.printf " AOH: %s km² / %s km² range = %s suitable\n"
820 (props "aoh_km2") (props "range_km2") (props "habitat_proportion");
821 Printf.printf " IUCN status: %s (%s), trend: %s\n"
822 (props "iucn_status") (props "iucn_criteria")
823 (props "population_trend");
824 Printf.printf " TESSERA model: %s\n" (props "tessera_model");
825 Printf.printf " Synthetic in training: %s (fraction: %s)\n\n"
826 (props "synthetic_in_sdm_training")
827 (props "synthetic_fraction_in_training");
828
829 (* Provenance depth *)
830 let ancestors = all_ancestors aoh_label in
831 let n_syn_ancestors = synthetic_ancestor_count aoh_label in
832 Printf.printf "Provenance graph from AOH:\n";
833 Printf.printf " Reachable labels: %d\n" (List.length ancestors);
834 Printf.printf " Of which synthetic: %d\n" n_syn_ancestors;
835
836 (* Activity for AOH *)
837 (match activity_of aoh_label with
838 | Some a ->
839 Printf.printf " Activity: %s\n" a.activity_id;
840 Printf.printf " Agent: %s\n" a.agent;
841 Printf.printf " Date: %s\n" a.date
842 | None -> ());
843
844 (* Spatial query *)
845 Printf.printf "\nLabels in cell b7a: %d\n"
846 (List.length (in_cell (c "b7a")))