(** AOH worked example: {i Panthera leo} in the Serengeti ecosystem. Demonstrates the full label pipeline from raw observations through synthetic simulation to Area of Habitat calculation, integrating data from: - Camera traps (Serengeti Lion Project grid) - GPS collars (Movebank study 1234) - GBIF occurrence records - iNaturalist citizen science observations - IUCN Red List expert range and habitat preferences - Lotka-Volterra population simulation (synthetic) - TESSERA v3.1 habitat classification The provenance graph: {v AOH polygon ├── species_range (alpha-shape, measured-only) │ ├── camera trap detections │ ├── GPS collar fixes (Movebank) │ ├── GBIF occurrences │ └── iNaturalist observations ├── IUCN expert range (validation) └── habitat suitability tiles (TESSERA) └── training set ├── all measured occurrences ├── IUCN habitat preferences └── synthetic augmentation (Lotka-Volterra) v} *) open Terradots let ed = event_date_of_string let c = cell_of_string (* ══════════════════════════════════════════════════════════ 1. Activities — the audit trail Each activity links a batch of labels to who/what produced them and when. The [agent] field points to Fairground notebook URIs where applicable. ══════════════════════════════════════════════════════════ *) let act_field_survey = { activity_id = "act-field-2024"; agent = "orcid:0000-0002-1234-5678"; date = "2024-06-15T08:00:00Z"; description = Some "Serengeti Lion Project 2024 dry-season \ camera trap survey" } let act_movebank_import = { activity_id = "act-movebank-import"; agent = "fairground:notebook/movebank-ingest:v2"; date = "2024-07-01T12:00:00Z"; description = Some "Bulk import of GPS collar data from \ Movebank study 1234, individuals leo-007 \ and leo-012" } let act_gbif_import = { activity_id = "act-gbif-import"; agent = "fairground:notebook/gbif-ingest:v3"; date = "2024-07-02T10:00:00Z"; description = Some "GBIF Panthera leo occurrences, East Africa, \ 2020-2024" } let act_inat_import = { activity_id = "act-inat-import"; agent = "fairground:notebook/inat-ingest:v1"; date = "2024-07-02T14:00:00Z"; description = Some "iNaturalist research-grade P. leo observations, \ Serengeti-Mara ecosystem" } let act_iucn_import = { activity_id = "act-iucn-import"; agent = "fairground:notebook/iucn-ingest:v1"; date = "2024-07-03T09:00:00Z"; description = Some "IUCN Red List Panthera leo assessment: expert \ range polygon and habitat preference codes" } let act_simulation = { activity_id = "act-sim-lv-001"; agent = "fairground:notebook/lotka-volterra-serengeti:v4@cell-7"; date = "2024-07-10T16:00:00Z"; description = Some "Lotka-Volterra predator-prey simulation, \ lion-zebra-wildebeest, Serengeti parameterisation, \ 100-year projection, seed=42" } let act_training_set = { activity_id = "act-training-2024"; agent = "fairground:notebook/sdm-training:v2"; date = "2024-07-15T10:00:00Z"; description = Some "Assemble training set for P. leo SDM: \ balanced spatial sample with synthetic \ augmentation from Lotka-Volterra run" } let act_habitat = { activity_id = "act-habitat-2024"; agent = "fairground:notebook/habitat-classify:v3"; date = "2024-07-16T09:00:00Z"; description = Some "Habitat suitability classification from \ TESSERA v3.1 land-cover embeddings, \ thresholded against IUCN habitat codes" } let act_range = { activity_id = "act-range-2024"; agent = "fairground:notebook/species-range:v2"; date = "2024-07-16T11:00:00Z"; description = Some "Alpha-shape species range from all verified \ occurrences (measured-only, no synthetic)" } let act_aoh = { activity_id = "act-aoh-2024"; agent = "fairground:notebook/aoh-iucn:v3"; date = "2024-07-16T14:00:00Z"; description = Some "IUCN Area of Habitat: species range intersected \ with suitable habitat tiles" } (* ══════════════════════════════════════════════════════════ 2. Camera trap observations — Serengeti Lion Project Fixed sensors in the Serengeti NP grid. Each trigger produces a Point at the trap's surveyed coordinates. Hilbert cells b7a–b7f cover the Serengeti at level 12. ══════════════════════════════════════════════════════════ *) let trap_01 = make_point ~cell:(c "b7a") ~id:"ct-001" ~x:34.82 ~y:(-2.33) ~observer:"urn:sensor:camera-trap:serengeti-node-17" ~class_dist:[("Panthera leo", 1.0)] ~accuracy_m:5.0 ~confidence:0.97 ~event_date:(ed "2024-06-12T05:42:00Z") ~activity:"act-field-2024" ~properties:[ ("image_uri", "s3://slp/ct17/IMG_4821.jpg"); ("individual_count", "3"); ("behaviour", "resting")] () let trap_02 = make_point ~cell:(c "b7a") ~id:"ct-002" ~x:34.83 ~y:(-2.32) ~observer:"urn:sensor:camera-trap:serengeti-node-17" ~class_dist:[("Panthera leo", 1.0)] ~accuracy_m:5.0 ~confidence:0.92 ~event_date:(ed "2024-06-14T19:15:00Z") ~activity:"act-field-2024" ~properties:[ ("image_uri", "s3://slp/ct17/IMG_4903.jpg"); ("individual_count", "1"); ("behaviour", "walking")] () let trap_03 = make_point ~cell:(c "b7c") ~id:"ct-003" ~x:35.01 ~y:(-2.15) ~observer:"urn:sensor:camera-trap:serengeti-node-42" ~class_dist:[("Panthera leo", 1.0)] ~accuracy_m:5.0 ~confidence:0.88 ~event_date:(ed "2024-06-18T03:22:00Z") ~activity:"act-field-2024" ~properties:[ ("image_uri", "s3://slp/ct42/IMG_1207.jpg"); ("individual_count", "2")] () (** Non-detection: trap triggered by motion but no lion present. These matter for occupancy models — absence data is data. *) let trap_04 = make_point ~cell:(c "b7d") ~id:"ct-004" ~x:35.22 ~y:(-2.45) ~observer:"urn:sensor:camera-trap:serengeti-node-55" ~event_date:(ed "2024-06-20T22:10:00Z") ~activity:"act-field-2024" ~properties:[ ("image_uri", "s3://slp/ct55/IMG_0891.jpg"); ("trigger", "motion"); ("species_detected", "none")] () (* ══════════════════════════════════════════════════════════ 3. GPS collar tracks — Movebank study 1234 Imported via the Movebank registry. Each fix is a Point with the collar as observer and the Movebank event URI as the registry record ([via]). ══════════════════════════════════════════════════════════ *) (** Individual leo-007: three fixes showing movement NE. *) let gps_01 = make_imported ~cell:(c "b7a") ~id:"gps-001" ~geometry:(Point { x = 34.81; y = -2.34 }) ~via:"movebank:study/1234/individual/leo-007/event/98001" ~observer:"urn:sensor:gps:vectronic-vertex-plus-007" ~license:"CC-BY-NC-4.0" ~accuracy_m:3.5 ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2024-06-10T06:00:00Z") ~activity:"act-movebank-import" ~properties:[ ("individual_id", "leo-007"); ("fix_type", "3D"); ("hdop", "0.9")] () let gps_02 = make_imported ~cell:(c "b7a") ~id:"gps-002" ~geometry:(Point { x = 34.84; y = -2.31 }) ~via:"movebank:study/1234/individual/leo-007/event/98002" ~observer:"urn:sensor:gps:vectronic-vertex-plus-007" ~license:"CC-BY-NC-4.0" ~accuracy_m:4.2 ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2024-06-10T12:00:00Z") ~activity:"act-movebank-import" ~properties:[ ("individual_id", "leo-007"); ("fix_type", "3D"); ("hdop", "1.1")] () let gps_03 = make_imported ~cell:(c "b7b") ~id:"gps-003" ~geometry:(Point { x = 34.91; y = -2.28 }) ~via:"movebank:study/1234/individual/leo-007/event/98003" ~observer:"urn:sensor:gps:vectronic-vertex-plus-007" ~license:"CC-BY-NC-4.0" ~accuracy_m:5.1 ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2024-06-11T06:00:00Z") ~activity:"act-movebank-import" ~properties:[ ("individual_id", "leo-007"); ("fix_type", "3D"); ("hdop", "1.4")] () (** Individual leo-012: separate pride, further east. *) let gps_04 = make_imported ~cell:(c "b7c") ~id:"gps-004" ~geometry:(Point { x = 35.05; y = -2.10 }) ~via:"movebank:study/1234/individual/leo-012/event/98501" ~observer:"urn:sensor:gps:vectronic-vertex-plus-012" ~license:"CC-BY-NC-4.0" ~accuracy_m:3.0 ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2024-06-12T06:00:00Z") ~activity:"act-movebank-import" ~properties:[("individual_id", "leo-012")] () (* ══════════════════════════════════════════════════════════ 4. GBIF occurrence records Museum specimens and field surveys aggregated through GBIF. Note the varying accuracy — the 2021 record has 500 m uncertainty (flagged for review in annotations). ══════════════════════════════════════════════════════════ *) let gbif_01 = make_imported ~cell:(c "b7a") ~id:"gbif-001" ~geometry:(Point { x = 34.85; y = -2.35 }) ~via:"gbif:4023589127" ~license:"CC-BY-4.0" ~accuracy_m:100.0 ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2022-08-14") ~activity:"act-gbif-import" ~properties:[ ("gbif_dataset", "serengeti-biodiversity-survey"); ("basis_of_record", "HUMAN_OBSERVATION"); ("recorded_by", "Tanzania Wildlife Research Institute")] () let gbif_02 = make_imported ~cell:(c "b7e") ~id:"gbif-002" ~geometry:(Point { x = 35.40; y = -2.50 }) ~via:"gbif:4023589999" ~license:"CC-BY-4.0" ~accuracy_m:500.0 ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2021") ~activity:"act-gbif-import" ~properties:[ ("gbif_dataset", "ngorongoro-mammal-survey"); ("basis_of_record", "HUMAN_OBSERVATION")] () (* ══════════════════════════════════════════════════════════ 5. iNaturalist citizen science Research-grade observations from the iNaturalist platform. The observer is a user URI; the record is the observation URI. ══════════════════════════════════════════════════════════ *) let inat_01 = make_imported ~cell:(c "b7b") ~id:"inat-001" ~geometry:(Point { x = 34.95; y = -2.20 }) ~via:"inaturalist:observation/182345678" ~observer:"inaturalist:user/safari_dave" ~license:"CC-BY-NC-4.0" ~accuracy_m:50.0 ~class_dist:[("Panthera leo", 1.0)] ~confidence:0.95 ~event_date:(ed "2023-07-22T16:30:00Z") ~activity:"act-inat-import" ~properties:[ ("quality_grade", "research"); ("num_identifications", "5")] () (* ══════════════════════════════════════════════════════════ 6. IUCN Red List — expert range and habitat preferences The IUCN assessment provides two things: (a) An expert-drawn range polygon for the species. (b) Habitat preference codes (IUCN Habitats Classification Scheme) with suitability ratings. The range polygon validates the data-driven range; the habitat codes drive the suitability classification. ══════════════════════════════════════════════════════════ *) (** Expert-drawn range polygon (simplified to bounding extent). *) let iucn_range = make_imported ~cell:(c "b70") ~id:"iucn-range-001" ~geometry:(Polygon [ { x = 34.0; y = -3.0 }; { x = 36.0; y = -3.0 }; { x = 36.0; y = -1.0 }; { x = 34.0; y = -1.0 }; { x = 34.0; y = -3.0 }; ]) ~via:"iucn:redlist:22/Panthera-leo:range:2024.1" ~license:"CC-BY-NC-4.0" ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2024") ~activity:"act-iucn-import" ~properties:[ ("iucn_status", "VU"); ("iucn_criteria", "A2abcd"); ("population_trend", "decreasing"); ("range_type", "extant:resident"); ("habitat_codes", "1.5;1.6;2;3;14.1")] () (** Habitat preference: savanna (IUCN code 2) — major habitat. *) let iucn_hab_savanna = make_imported ~cell:(c "b70") ~id:"iucn-hab-001" ~geometry:(Point { x = 35.0; y = -2.0 }) ~via:"iucn:redlist:22/Panthera-leo:habitat:2" ~license:"CC-BY-NC-4.0" ~class_dist:[("habitat-preference:savanna", 1.0)] ~confidence:0.95 ~activity:"act-iucn-import" ~properties:[ ("iucn_habitat_code", "2"); ("suitability", "Suitable"); ("major_importance", "Yes")] () (** Habitat preference: shrubland (IUCN code 3) — minor habitat. *) let iucn_hab_shrubland = make_imported ~cell:(c "b70") ~id:"iucn-hab-002" ~geometry:(Point { x = 35.0; y = -2.0 }) ~via:"iucn:redlist:22/Panthera-leo:habitat:3" ~license:"CC-BY-NC-4.0" ~class_dist:[("habitat-preference:shrubland", 1.0)] ~confidence:0.70 ~activity:"act-iucn-import" ~properties:[ ("iucn_habitat_code", "3"); ("suitability", "Suitable"); ("major_importance", "No")] () (* ══════════════════════════════════════════════════════════ 7. Synthetic simulation — Lotka-Volterra population dynamics Agent-based Lotka-Volterra model producing simulated lion positions in under-sampled areas (Ngorongoro corridor). These augment the SDM training set but are NEVER included in the measured species range. The [Simulated] origin keeps them type-level distinct from real observations. Properties carry the scenario parameters for reproducibility. ══════════════════════════════════════════════════════════ *) let sim_01 = make_simulated ~cell:(c "b7d") ~id:"sim-001" ~geometry:(Point { x = 35.20; y = -2.50 }) ~model:"fairground:notebook/lotka-volterra-serengeti:v4" ~run_id:"lv-run-42" ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2024-06-15T00:00:00Z") ~confidence:0.60 ~activity:"act-sim-lv-001" ~properties:[ ("scenario", "baseline-2024"); ("time_step", "150"); ("prey_density_km2", "45.2"); ("seed", "42")] () let sim_02 = make_simulated ~cell:(c "b7d") ~id:"sim-002" ~geometry:(Point { x = 35.18; y = -2.48 }) ~model:"fairground:notebook/lotka-volterra-serengeti:v4" ~run_id:"lv-run-42" ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2024-06-15T06:00:00Z") ~confidence:0.60 ~activity:"act-sim-lv-001" ~properties:[ ("scenario", "baseline-2024"); ("time_step", "151"); ("prey_density_km2", "44.8"); ("seed", "42")] () (** Drought scenario — prey density drops, lion shifts south. *) let sim_03 = make_simulated ~cell:(c "b7e") ~id:"sim-003" ~geometry:(Point { x = 35.45; y = -2.55 }) ~model:"fairground:notebook/lotka-volterra-serengeti:v4" ~run_id:"lv-run-42" ~class_dist:[("Panthera leo", 1.0)] ~event_date:(ed "2024-06-16T00:00:00Z") ~confidence:0.55 ~activity:"act-sim-lv-001" ~properties:[ ("scenario", "drought-2024"); ("time_step", "152"); ("prey_density_km2", "28.1"); ("seed", "42")] () (* ══════════════════════════════════════════════════════════ 8. Derivation: training set assembly The training set is itself a derived label — it records exactly which labels (measured + synthetic) were selected for model training, and the synthetic fraction. This is the provenance anchor for the SDM: you can always ask "which observations trained this model?" ══════════════════════════════════════════════════════════ *) let all_measured_ids = [ "ct-001"; "ct-002"; "ct-003"; "gps-001"; "gps-002"; "gps-003"; "gps-004"; "gbif-001"; "gbif-002"; "inat-001" ] let all_synthetic_ids = [ "sim-001"; "sim-002"; "sim-003" ] let training_set = make_derived ~cell:(c "b70") ~id:"ts-001" ~geometry:(Polygon [ { x = 34.0; y = -3.0 }; { x = 36.0; y = -3.0 }; { x = 36.0; y = -1.0 }; { x = 34.0; y = -1.0 }; { x = 34.0; y = -3.0 }; ]) ~sources:(all_measured_ids @ all_synthetic_ids) ~method_:"training-set:balanced-spatial-sample" ~class_dist:[("training-set:Panthera-leo:sdm-2024", 1.0)] ~activity:"act-training-2024" ~properties:[ ("n_measured", string_of_int (List.length all_measured_ids)); ("n_synthetic", string_of_int (List.length all_synthetic_ids)); ("synthetic_fraction", "0.23"); ("spatial_extent", "34.0,-3.0,36.0,-1.0"); ("temporal_window", "2021/2024"); ("tessera_model", "tessera:v3.1:east-africa")] () (* ══════════════════════════════════════════════════════════ 9. Derivation: habitat suitability from TESSERA Each TESSERA tile is classified as suitable or unsuitable based on its land-cover embedding and the IUCN habitat preference codes. The [sources] link back to the IUCN habitat labels. ══════════════════════════════════════════════════════════ *) let hab_sources = ["iucn-hab-001"; "iucn-hab-002"] (** Core Serengeti savanna — highly suitable. *) let hab_01 = make_derived ~cell:(c "b7a") ~id:"hab-001" ~geometry:(Polygon [ { x = 34.80; y = -2.40 }; { x = 34.90; y = -2.40 }; { x = 34.90; y = -2.30 }; { x = 34.80; y = -2.30 }; { x = 34.80; y = -2.40 }; ]) ~sources:hab_sources ~method_:"habitat-classify:tessera-v3.1:threshold-0.6" ~confidence:0.91 ~class_dist:[("savanna", 0.78); ("shrubland", 0.13); ("other", 0.09)] ~activity:"act-habitat-2024" ~properties:[ ("tessera_tile", "b7a:034.80:-002.40"); ("dominant_landcover", "savanna")] () (** Savanna-shrubland mosaic — moderate suitability. *) let hab_02 = make_derived ~cell:(c "b7d") ~id:"hab-002" ~geometry:(Polygon [ { x = 35.10; y = -2.60 }; { x = 35.20; y = -2.60 }; { x = 35.20; y = -2.50 }; { x = 35.10; y = -2.50 }; { x = 35.10; y = -2.60 }; ]) ~sources:hab_sources ~method_:"habitat-classify:tessera-v3.1:threshold-0.6" ~confidence:0.68 ~class_dist:[("savanna", 0.45); ("shrubland", 0.30); ("cropland", 0.25)] ~activity:"act-habitat-2024" ~properties:[ ("tessera_tile", "b7d:035.10:-002.60"); ("dominant_landcover", "savanna-shrubland-mosaic")] () (** Agricultural land — unsuitable, excluded from AOH. *) let hab_03 = make_derived ~cell:(c "b7f") ~id:"hab-003" ~geometry:(Polygon [ { x = 35.80; y = -1.20 }; { x = 35.90; y = -1.20 }; { x = 35.90; y = -1.10 }; { x = 35.80; y = -1.10 }; { x = 35.80; y = -1.20 }; ]) ~sources:hab_sources ~method_:"habitat-classify:tessera-v3.1:threshold-0.6" ~confidence:0.12 ~class_dist:[("cropland", 0.72); ("settlement", 0.18); ("savanna", 0.10)] ~activity:"act-habitat-2024" ~properties:[ ("tessera_tile", "b7f:035.80:-001.20"); ("dominant_landcover", "cropland")] () (* ══════════════════════════════════════════════════════════ 10. Derivation: species range from occurrences Alpha-shape computed from measured-only data. Synthetic labels are explicitly excluded — the range must reflect where lions have actually been observed. The [is_simulated] accessor is used by the range pipeline to filter out synthetic augmentation. ══════════════════════════════════════════════════════════ *) let species_range = make_derived ~cell:(c "b70") ~id:"range-001" ~geometry:(Polygon [ { x = 34.75; y = -2.60 }; { x = 35.50; y = -2.60 }; { x = 35.50; y = -2.00 }; { x = 35.10; y = -1.90 }; { x = 34.75; y = -2.10 }; { x = 34.75; y = -2.60 }; ]) ~sources:all_measured_ids (* no sim-* labels *) ~method_:"alpha-shape:alpha-0.005" ~class_dist:[("range:Panthera leo", 1.0)] ~activity:"act-range-2024" ~properties:[ ("range_km2", "4850"); ("n_occurrences", string_of_int (List.length all_measured_ids)); ("excludes_synthetic", "true")] () (* ══════════════════════════════════════════════════════════ 11. Derivation: Area of Habitat The final AOH is the intersection of: - the data-driven species range (measured-only) - the TESSERA habitat suitability tiles (suitable only) - validated against the IUCN expert range The result is a Multi polygon — disconnected habitat patches within the range. Properties carry the IUCN assessment metadata and the key metrics. When TESSERA is retrained (v3.1 → v3.2), the habitat tiles change, so AOH recomputes. The new AOH label gets a new activity; both versions coexist for comparison. ══════════════════════════════════════════════════════════ *) let aoh = make_derived ~cell:(c "b70") ~id:"aoh-001" ~geometry:(Multi [ (* Patch 1: core Serengeti savanna *) Polygon [ { x = 34.80; y = -2.40 }; { x = 35.20; y = -2.40 }; { x = 35.20; y = -2.10 }; { x = 34.80; y = -2.10 }; { x = 34.80; y = -2.40 }; ]; (* Patch 2: southern extension into Ngorongoro *) Polygon [ { x = 35.10; y = -2.60 }; { x = 35.40; y = -2.60 }; { x = 35.40; y = -2.40 }; { x = 35.10; y = -2.40 }; { x = 35.10; y = -2.60 }; ]; ]) ~sources:[ "range-001"; (* data-driven species range *) "iucn-range-001"; (* IUCN expert range — validation *) "hab-001"; "hab-002"; (* suitable habitat tiles *) (* hab-003 excluded: unsuitable cropland *) ] ~method_:"aoh:iucn-2022:range-intersect-habitat" ~class_dist:[("aoh:Panthera leo", 1.0)] ~activity:"act-aoh-2024" ~properties:[ (* AOH metrics *) ("aoh_km2", "3420"); ("range_km2", "4850"); ("habitat_proportion", "0.705"); ("unsuitable_excluded_km2", "1430"); ("dominant_exclusion", "cropland"); (* IUCN assessment context *) ("iucn_status", "VU"); ("iucn_criteria", "A2abcd"); ("population_trend", "decreasing"); (* Model provenance *) ("tessera_model", "tessera:v3.1:east-africa"); ("synthetic_in_sdm_training", "true"); ("synthetic_fraction_in_training", "0.23")] () (* ══════════════════════════════════════════════════════════ 12. Document assembly ══════════════════════════════════════════════════════════ *) let doc = { crs = wgs84; level = 12; provenance = [ act_field_survey; act_movebank_import; act_gbif_import; act_inat_import; act_iucn_import; act_simulation; act_training_set; act_habitat; act_range; act_aoh; ]; labels = [ (* Camera traps *) trap_01; trap_02; trap_03; trap_04; (* GPS collars — Movebank *) gps_01; gps_02; gps_03; gps_04; (* GBIF *) gbif_01; gbif_02; (* iNaturalist *) inat_01; (* IUCN Red List *) iucn_range; iucn_hab_savanna; iucn_hab_shrubland; (* Synthetic — Lotka-Volterra *) sim_01; sim_02; sim_03; (* Derivations *) training_set; hab_01; hab_02; hab_03; species_range; aoh; ]; annotations = [ { id = "ann-001"; text = "Camera trap ct-001 and GPS fix gps-001 are 1.4 km \ apart on the same day — likely same pride. Consider \ merge after dry-season survey completes."; anchors = ["ct-001"; "gps-001"] }; { id = "ann-002"; text = "GBIF gbif-002 has 500 m uncertainty and only year-level \ temporal precision. Flag for review before including \ in high-resolution analyses."; anchors = ["gbif-002"] }; { id = "ann-003"; text = "Synthetic labels sim-001..sim-003 augment the under-sampled \ Ngorongoro corridor. Weight reduced to 0.5x in training \ set assembly. Not included in species range computation."; anchors = ["sim-001"; "sim-002"; "sim-003"] }; { id = "ann-004"; text = "AOH shows 70.5% of range is suitable habitat. Main \ exclusion is cropland encroachment on the eastern boundary. \ Compare with IUCN 2019 assessment (was 78%)."; anchors = ["aoh-001"] }; ]; groups = [ { id = "grp-field-2024"; activity = Some "act-field-2024"; members = ["ct-001"; "ct-002"; "ct-003"; "ct-004"] }; { id = "grp-leo-007-track"; activity = Some "act-movebank-import"; members = ["gps-001"; "gps-002"; "gps-003"] }; { id = "grp-leo-012-track"; activity = Some "act-movebank-import"; members = ["gps-004"] }; { id = "grp-synthetic-lv42"; activity = Some "act-sim-lv-001"; members = ["sim-001"; "sim-002"; "sim-003"] }; { id = "grp-iucn-habitat-prefs"; activity = Some "act-iucn-import"; members = ["iucn-hab-001"; "iucn-hab-002"] }; ]; } (* ══════════════════════════════════════════════════════════ 13. Queries — demonstrating the provenance graph These functions show how a wiki renderer or analysis pipeline would traverse the label graph. ══════════════════════════════════════════════════════════ *) (** Find a label by ID. *) let find id = List.find (fun (l : label) -> l.id = id) doc.labels (** All labels in a Hilbert cell. *) let in_cell c = List.filter (fun (l : label) -> l.cell = c) doc.labels (** All measured (non-synthetic, non-derived) labels. *) let measured_only () = List.filter (fun (l : label) -> match l.origin with Measured _ -> true | _ -> false) doc.labels (** All simulated labels. *) let synthetic_only () = List.filter is_simulated doc.labels (** Immediate sources of a derived label. *) let sources_of_label l = List.filter_map (fun src_id -> match List.find_opt (fun (l : label) -> l.id = src_id) doc.labels with | Some src -> Some src | None -> None) (sources_of l) (** Transitive closure: all labels reachable through [sources]. *) let rec all_ancestors l = let immediate = sources_of_label l in let deeper = List.concat_map all_ancestors immediate in immediate @ deeper (** How many synthetic labels influenced this derivation? *) let synthetic_ancestor_count l = all_ancestors l |> List.filter is_simulated |> List.length (** Activity record for a label. *) let activity_of (l : label) = match l.activity with | None -> None | Some aid -> List.find_opt (fun a -> a.activity_id = aid) doc.provenance (* ══════════════════════════════════════════════════════════ 14. Main — exercise the provenance queries ══════════════════════════════════════════════════════════ *) let () = let n_labels = List.length doc.labels in let n_measured = List.length (measured_only ()) in let n_synthetic = List.length (synthetic_only ()) in let n_derived = n_labels - n_measured - n_synthetic in Printf.printf "Terradots AOH Example: Panthera leo, Serengeti\n"; Printf.printf "══════════════════════════════════════════════\n"; Printf.printf "CRS: %s Hilbert level: %d\n" doc.crs doc.level; Printf.printf "Labels: %d total (%d measured, %d synthetic, %d derived)\n" n_labels n_measured n_synthetic n_derived; Printf.printf "Activities: %d\n" (List.length doc.provenance); Printf.printf "Annotations: %d\n" (List.length doc.annotations); Printf.printf "Groups: %d\n\n" (List.length doc.groups); (* AOH provenance *) let aoh_label = find "aoh-001" in Printf.printf "AOH label: %s\n" (label_name aoh_label); let props key = List.assoc_opt key aoh_label.properties |> Option.value ~default:"?" in Printf.printf " AOH: %s km² / %s km² range = %s suitable\n" (props "aoh_km2") (props "range_km2") (props "habitat_proportion"); Printf.printf " IUCN status: %s (%s), trend: %s\n" (props "iucn_status") (props "iucn_criteria") (props "population_trend"); Printf.printf " TESSERA model: %s\n" (props "tessera_model"); Printf.printf " Synthetic in training: %s (fraction: %s)\n\n" (props "synthetic_in_sdm_training") (props "synthetic_fraction_in_training"); (* Provenance depth *) let ancestors = all_ancestors aoh_label in let n_syn_ancestors = synthetic_ancestor_count aoh_label in Printf.printf "Provenance graph from AOH:\n"; Printf.printf " Reachable labels: %d\n" (List.length ancestors); Printf.printf " Of which synthetic: %d\n" n_syn_ancestors; (* Activity for AOH *) (match activity_of aoh_label with | Some a -> Printf.printf " Activity: %s\n" a.activity_id; Printf.printf " Agent: %s\n" a.agent; Printf.printf " Date: %s\n" a.date | None -> ()); (* Spatial query *) Printf.printf "\nLabels in cell b7a: %d\n" (List.length (in_cell (c "b7a")))