Feature fit frame exterior (#28)

dugalh · web-flow · commit d89ac644cf7c · 2025-05-03T19:48:30.000+02:00
* separate cv to oty ext param conversion into _cv_ext_to_oty_ext

* add to dos

* add fit_frame()

* offset coordinates in _cv_ext_to_oty_ext() to fix float32 precision issues

* add tests for fit_frame()

* add fit_frame_exterior()

* add tests for fit_frame_exterior()

* fix changing loop var in loop

* increment version

* add note about fisheye behaviour

* better splitting of gcps over images

* make fit_frame() &amp; its tests private to avoid a windows OpenCV 4.11 specific issue for now
diff --git a/.github/workflows/install-test-conda-forge.yml b/.github/workflows/install-test-conda-forge.yml
@@ -30,7 +30,7 @@ jobs:
       - name: Install package
         run: |
           conda info
-          conda install orthority>=0.5.1
+          conda install orthority>=0.6.0
           conda list
 
       - name: Install OpenCV Linux dependencies
diff --git a/orthority/camera.py b/orthority/camera.py
@@ -209,6 +209,7 @@ def pixel_boundary(self, num_pts: int = None) -> np.ndarray:
             Boundary pixel (j=column, i=row) coordinates as a 2-by-N array, with (j, i) along the
             first dimension.
         """
+        # TODO: this does not always return the correct number of pts e.g. num_pts=7 / 11
 
         def rect_boundary(im_size: np.ndarray, num_pts: int) -> np.ndarray:
             """Return a rectangular pixel coordinate boundary of ``num_pts`` ~evenly spaced points
@@ -858,6 +859,8 @@ def pixel_to_world_z(self, ji: np.ndarray, z: float | np.ndarray) -> np.ndarray:
         """
         # TODO: consider only returning (x, y).  the z dimension is redundant, and it is used this
         #  way in most (all?) places.
+        # TODO: i have noticed that the results with e.g. z=0 sometimes have z close to but not
+        #  equal 0.  is there a way of re-organising this so that doesn't happen?
         self._test_init()
         self._validate_pixel_coords(ji)
         self._validate_z(z, ji)
diff --git a/orthority/fit.py b/orthority/fit.py
@@ -13,22 +13,38 @@
 # You should have received a copy of the GNU Affero General Public License along with Orthority.
 # If not, see <https://www.gnu.org/licenses/>.
 """Camera model fitting and refinement."""
+
 from __future__ import annotations
 
 import logging
+import warnings
+from collections.abc import Sequence
 from copy import deepcopy
-from typing import Sequence
+from math import ceil
+from typing import Any
 
+import cv2
 import numpy as np
+from rasterio.crs import CRS
 from rasterio.rpc import RPC
+from rasterio.warp import transform
 
-from orthority.camera import RpcCamera
-from orthority.enums import RpcRefine
+from orthority import param_io
+from orthority.camera import FrameCamera, RpcCamera
+from orthority.enums import CameraType, RpcRefine
+from orthority.errors import OrthorityWarning
 
 logger = logging.getLogger(__name__)
 
 _default_rpc_refine_method = RpcRefine.shift
 
+_frame_dist_params = {k: v[3:] for k, v in param_io._opt_frame_schema.items()}
+"""Distortion coefficient names in OpenCV ordering for each frame camera model."""
+_frame_num_params = {k: len(v) + 6 for k, v in _frame_dist_params.items()}
+"""Number of distortion coefficient and exterior parameters for each frame camera model (excludes 
+focal length(s) and principal point).
+"""
+
 
 def refine_rpc(
     rpc: RPC | dict, gcps: Sequence[dict], method: RpcRefine = _default_rpc_refine_method
@@ -85,7 +101,7 @@ def _norm_ji(rpc: dict, ji: np.ndarray) -> np.ndarray:
         refine_tform[:, -1] = off.mean(axis=1)
     else:
         for axis in range(2):
-            ji_rpc_ = np.vstack((ji_rpc[axis], np.ones((ji_rpc.shape[1]))))
+            ji_rpc_ = np.vstack((ji_rpc[axis], np.ones(ji_rpc.shape[1])))
             (m, c), res, rank, s = np.linalg.lstsq(ji_rpc_.T, ji_gcp[axis], rcond=None)
             refine_tform[axis, axis] = m
             refine_tform[axis, 2] = c
@@ -112,3 +128,233 @@ def _norm_ji(rpc: dict, ji: np.ndarray) -> np.ndarray:
         refined_rpc[num_key] += np.array(refined_rpc[den_key]) * refine_tform[axis, 2]
         refined_rpc[num_key] = refined_rpc[num_key].tolist()
     return refined_rpc
+
+
+def _gcps_to_cv_coords(
+    gcp_dict: dict[str, Sequence[dict]], crs: str | CRS | None = None
+) -> tuple[list[np.ndarray], list[np.ndarray], np.ndarray]:
+    """Convert a GCP dictionary to list of pixel coordinate arrays, a list of world coordinate
+    arrays and a reference world coordinate position which world coordinate arrays have been
+    offset relative to.
+    """
+    crs = CRS.from_string(crs) if isinstance(crs, str) else crs
+    # form lists of pixel and world coordinate arrays
+    jis = []
+    xyzs = []
+    for gcps in gcp_dict.values():
+        ji = np.array([gcp['ji'] for gcp in gcps])
+        xyz = np.array([gcp['xyz'] for gcp in gcps])
+        if crs:
+            xyz = np.array(transform(CRS.from_epsg(4979), crs, *(xyz.T))).T
+        jis.append(ji.astype('float32'))
+        xyzs.append(xyz)
+
+    # offset world coordinates and convert to float32
+    ref_xyz = np.vstack(xyzs).mean(axis=0)
+    xyzs = [(xyz - ref_xyz).astype('float32') for xyz in xyzs]
+    return jis, xyzs, ref_xyz
+
+
+def _fit_frame(
+    cam_type: CameraType,
+    im_size: tuple[int, int],
+    gcp_dict: dict[str, Sequence[dict]],
+    crs: str | CRS | None = None,
+) -> tuple[dict[str, dict[str, Any]], dict[str, dict[str, Any]]]:
+    """
+    Fit a frame camera to GCPs.
+
+    :param cam_type:
+        Camera type to fit.
+    :param im_size:
+        Image (width, height) in pixels.
+    :param gcp_dict:
+        GCP dictionary e.g. as returned by :func:`~orthority.param_io.read_im_gcps` or
+        :func:`~orthority.param_io.read_oty_gcps`.
+    :param crs:
+        CRS of the camera world coordinate system as an EPSG, proj4 or WKT string,
+        or :class:`~rasterio.crs.CRS` object.  If set to ``None`` (the default), GCPs are assumed
+        to be in the world coordinate CRS, and are not transformed.  Otherwise, GCPs are
+        transformed from geographic WGS84 coordinates to this CRS if it is supplied.
+
+    :return:
+        Interior parameter and exterior parameter dictionaries.
+    """
+    # TODO: is it better to use cv2.initCameraMatrix2D and cv2.solvePnp(flags=cv2.SOLVEPNP_SQPNP)
+    #  rather than cv2.calibrateCamera when num pts <=4
+
+    # check there are at least 4 GCPs per image
+    min_gcps = min(len(gcps) for gcps in gcp_dict.values())
+    if min_gcps < 4:
+        raise ValueError('At least four GCPs are needed per image.')
+
+    # check the total number of GCPs is enough to fit cam_type
+    ttl_gcps = sum(len(gcps) for gcps in gcp_dict.values())
+    req_gcps = max(4, ceil((1 + _frame_num_params[cam_type]) / 2))
+    if ttl_gcps < req_gcps:
+        raise ValueError(
+            f"A total of at least {req_gcps} GCPs are required to fit the '{cam_type!r}' model."
+        )
+
+    # convert GCPs to OpenCV compatible lists of arrays
+    jis, xyzs, ref_xyz = _gcps_to_cv_coords(gcp_dict, crs=crs)
+
+    # check if GCPs are co-planar (replicates OpenCV's test)
+    zs = np.vstack([xyz[:, 2] for xyz in xyzs])
+    z_mean, z_std = np.mean(zs), np.std(zs)
+    if z_mean > 1e-5 or z_std > 1e-5:
+        raise ValueError('GCPs should be co-planar to fit interior parameters.')
+
+    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_COUNT, 1000, 1e-15)
+    warn_str = (
+        "A total of at least {0} GCPs are required to estimate all '{1!r}' parameters, but there "
+        "are {2}.  The initial intrinsic matrix will not be globally optimised."
+    )
+
+    # setup calibration flags & params based on cam_type and number of GCPs
+    if cam_type is not CameraType.fisheye:
+        calib_func = cv2.calibrateCamera
+        # force square pixels always
+        flags = cv2.CALIB_FIX_ASPECT_RATIO
+
+        # fix initial intrinsic matrix if there are not enough GCPs to estimate all params (+3 is
+        # for 1 focal length and 2 principal points)
+        req_gcps = ceil((_frame_num_params[cam_type] + 3 + 1) / 2)
+        if ttl_gcps < req_gcps:
+            warnings.warn(
+                warn_str.format(req_gcps, cam_type, ttl_gcps),
+                category=OrthorityWarning,
+                stacklevel=2,
+            )
+            flags |= cv2.CALIB_FIX_PRINCIPAL_POINT | cv2.CALIB_FIX_FOCAL_LENGTH
+
+        if cam_type is CameraType.pinhole:
+            # fix distortion at zero
+            flags |= (
+                cv2.CALIB_ZERO_TANGENT_DIST | cv2.CALIB_FIX_K1 | cv2.CALIB_FIX_K2 | cv2.CALIB_FIX_K3
+            )
+        elif cam_type is CameraType.opencv:
+            # enable full OpenCV model
+            flags |= cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_THIN_PRISM_MODEL | cv2.CALIB_TILTED_MODEL
+
+    else:
+        calib_func = cv2.fisheye.calibrate
+        # the oty fisheye camera does not have skew/alpha and CALIB_RECOMPUTE_EXTRINSIC improves
+        # accuracy
+        flags = cv2.fisheye.CALIB_FIX_SKEW | cv2.fisheye.CALIB_RECOMPUTE_EXTRINSIC
+
+        # Fix initial intrinsic matrix if there are not enough GCPs to estimate all params (+4 is
+        # for 2 focal lengths (you can't fix fisheye aspect ratio) and 2 principal points).
+        # (Note that cv2.fisheye.calibrate() behaves differently to cv2.fisheye.calibrate(): it
+        # still runs with ttl_gcps < req_gcps, apparently fixing K and distortion coefficients.)
+        # TODO: cv2.fisheye.calibrate() seems to require a min of 5 GCPs.  confirm & change the
+        #  above check for that, and consider removing the flag changes below which seem to be
+        #  handled internally by cv2.fisheye.calibrate()
+        req_gcps = ceil((_frame_num_params[cam_type] + 4 + 1) / 2)
+        if ttl_gcps < req_gcps:
+            warnings.warn(
+                warn_str.format(req_gcps, cam_type, ttl_gcps),
+                category=OrthorityWarning,
+                stacklevel=2,
+            )
+            flags |= cv2.fisheye.CALIB_FIX_PRINCIPAL_POINT | cv2.fisheye.CALIB_FIX_FOCAL_LENGTH
+
+        # convert coords to cv2.fisheye format
+        xyzs = [xyz[None, :] for xyz in xyzs]
+        jis = [ji[None, :] for ji in jis]
+
+    # calibrate
+    err, K, dist_param, rs, ts = calib_func(
+        xyzs, jis, im_size, None, None, flags=flags, criteria=criteria
+    )
+    logger.debug(
+        f"RMS reprojection error for fit of '{cam_type}' model to {ttl_gcps} GCPs: {err:.4f}"
+    )
+
+    # convert opencv to oty format interior & exterior params
+    cam_id = f'{cam_type!r}_fit_to_{ttl_gcps}_gcps'
+    c_xy = (K[0, 2], K[1, 2]) - (np.array(im_size) - 1) / 2
+    c_xy /= max(im_size)
+    dist_param = dict(zip(_frame_dist_params[cam_type], dist_param.squeeze().tolist()))
+
+    int_param = dict(
+        cam_type=cam_type,
+        im_size=im_size,
+        focal_len=(K[0, 0], K[1, 1]),
+        sensor_size=(float(im_size[0]), float(im_size[1])),
+        cx=c_xy[0],
+        cy=c_xy[1],
+        **dist_param,
+    )
+    int_param_dict = {cam_id: int_param}
+
+    ext_param_dict = {}
+    for filename, t, r in zip(gcp_dict.keys(), ts, rs):
+        xyz, opk = param_io._cv_ext_to_oty_ext(t, r, ref_xyz=ref_xyz)
+        ext_param_dict[filename] = dict(xyz=xyz, opk=opk, camera=cam_id)
+
+    return int_param_dict, ext_param_dict
+
+
+def fit_frame_exterior(
+    int_param_dict: dict[str, dict[str, Any]],
+    gcp_dict: dict[str, Sequence[dict]],
+    crs: str | CRS | None = None,
+):
+    """
+    Fit frame camera exterior parameters to GCPs, given the camera's interior parameters.
+
+    :param int_param_dict:
+        Interior parameter dictionary.
+    :param gcp_dict:
+        GCP dictionary e.g. as returned by :func:`~orthority.param_io.read_im_gcps` or
+        :func:`~orthority.param_io.read_oty_gcps`.
+    :param crs:
+        CRS of the camera world coordinate system as an EPSG, proj4 or WKT string,
+        or :class:`~rasterio.crs.CRS` object.  If set to ``None`` (the default), GCPs are assumed
+        to be in the world coordinate CRS, and are not transformed.  Otherwise, GCPs are
+        transformed from geographic WGS84 coordinates to this CRS if it is supplied.
+
+    :return:
+        Exterior parameter dictionary.
+    """
+    if len(int_param_dict) > 1:
+        warnings.warn(
+            f"Refining the first of {len(int_param_dict)} cameras defined in the interior "
+            f"parameter dictionary.",
+            category=OrthorityWarning,
+            stacklevel=2,
+        )
+    cam_id = next(iter(int_param_dict.keys()))
+    int_param = next(iter(int_param_dict.values()))
+
+    # check there are at least 3 GCPs per image
+    min_gcps = min(len(gcps) for gcps in gcp_dict.values())
+    if min_gcps < 3:
+        raise ValueError('At least three GCPs are needed per image.')
+
+    # get initial intrinsic matrix
+    K = FrameCamera._get_intrinsic(
+        int_param['im_size'],
+        int_param['focal_len'],
+        int_param.get('sensor_size'),
+        int_param.get('cx', 0.0),
+        int_param.get('cy', 0.0),
+    )
+
+    # get initial distortion coefficients
+    dist_names = _frame_dist_params[int_param['cam_type']]
+    dist_param = [int_param.get(dn, 0.0) for dn in dist_names]
+    dist_param = np.array(dist_param) if dist_param else None
+
+    # convert GCPs to OpenCV compatible lists of arrays
+    jis, xyzs, ref_xyz = _gcps_to_cv_coords(gcp_dict, crs=crs)
+
+    # fit exterior parameters (SOLVEPNP_SQPNP is globally optimal so does not need further refining)
+    ext_param_dict = {}
+    for filename, xyz, ji in zip(gcp_dict.keys(), xyzs, jis):
+        _, r, t = cv2.solvePnP(xyz, ji, K, dist_param, flags=cv2.SOLVEPNP_SQPNP)
+        xyz_, opk = param_io._cv_ext_to_oty_ext(t, r, ref_xyz=ref_xyz)
+        ext_param_dict[filename] = dict(xyz=xyz_, opk=opk, camera=cam_id)
+
+    return ext_param_dict
diff --git a/orthority/param_io.py b/orthority/param_io.py
@@ -365,7 +365,7 @@ def _read_im_rpc_param(
         rpc_param = dict(cam_type=CameraType.rpc, im_size=im_size, rpc=rpc.to_dict())
         # TODO: can filename be made to conform to actual case of the filename on the file
         #  system? otherwise, in windows the user can pass a different case filename here which
-        #  won't macth with GCPs when refining.
+        #  won't match with GCPs when refining.
         return {filename: rpc_param}
 
     # read RPC params in a thread pool, populating rpc_param_dict in same order as files
@@ -488,8 +488,8 @@ def _read_im_gcps(
         # https://gdal.org/user/raster_data_model.html#gcps.  This assumes image GCPs are in
         # center of pixel coordinate convention.
         oty_gcps = []
-        for gcp, xyz in zip(gcps, xyz.T):
-            gcp = dict(ji=(gcp.col, gcp.row), xyz=tuple(xyz.tolist()), id=gcp.id, info=gcp.info)
+        for gcp, xyz_ in zip(gcps, xyz.T):
+            gcp = dict(ji=(gcp.col, gcp.row), xyz=tuple(xyz_.tolist()), id=gcp.id, info=gcp.info)
             oty_gcps.append(gcp)
 
         return {filename: oty_gcps}
@@ -729,9 +729,9 @@ def _opk_to_rotation(opk: tuple[float, float, float]) -> np.ndarray:
 def _rotation_to_opk(R: np.ndarray) -> tuple[float, float, float]:
     """Convert the given rotation matrix to the (omega, phi, kappa) angles in radians."""
     # see https://s3.amazonaws.com/mics.pix4d.com/KB/documents/Pix4D_Yaw_Pitch_Roll_Omega_to_Phi_Kappa_angles_and_conversion.pdf
-    omega = np.arctan2(-R[1, 2], R[2, 2])
-    phi = np.arcsin(R[0, 2])
-    kappa = np.arctan2(-R[0, 1], R[0, 0])
+    omega = float(np.arctan2(-R[1, 2], R[2, 2]))
+    phi = float(np.arcsin(R[0, 2]))
+    kappa = float(np.arctan2(-R[0, 1], R[0, 0]))
     return omega, phi, kappa
 
 
@@ -835,6 +835,27 @@ def _rpy_to_opk(
     return omega, phi, kappa
 
 
+def _cv_ext_to_oty_ext(
+    t: Sequence[float] | np.ndarray,
+    r: Sequence[float] | np.ndarray,
+    ref_xyz: Sequence[float] | np.ndarray | None = None,
+) -> tuple[tuple[float, float, float], tuple[float, float, float]]:
+    """Convert OpenCV / OpenSfM rotation and translation vectors to Orthority format and
+    convention camera (x, y, z) position and (omega, phi, kappa) angles.  Camera positions are
+    offset by ``ref_xyz`` if it is supplied.
+    """
+    # adapted from ODM: https://github.com/OpenDroneMap/ODM/blob/master/opendm/shots.py
+    R = cv2.Rodrigues(np.array(r))[0].T
+    xyz = (-R.dot(t)).squeeze()
+    if ref_xyz is not None:
+        xyz += ref_xyz
+    xyz = tuple(xyz.tolist())
+    # rotate camera coords from OpenSfM / OpenCV to PATB convention
+    R_ = R.dot(np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]))
+    opk = _rotation_to_opk(R_)
+    return xyz, opk
+
+
 class FrameReader(ABC):
     """
     Base frame camera parameter reader.
@@ -1231,14 +1252,10 @@ def read_ext_param(self) -> dict[str, dict[str, Any]]:
 
         ext_param_dict = {}
         for filename, shot_dict in self._json_dict['shots'].items():
-            # convert  reconstruction 'translation' and 'rotation' to oty exterior params,
-            # adapted from ODM: https://github.com/OpenDroneMap/ODM/blob/master/opendm/shots.py
-            R = cv2.Rodrigues(np.array(shot_dict['rotation']))[0].T
-            delta_xyz = -R.dot(shot_dict['translation'])
-            xyz = tuple((ref_xyz + delta_xyz).tolist())
-            # rotate camera coords from OpenSfM / OpenCV to PATB convention
-            R_ = R.dot(np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]))
-            opk = _rotation_to_opk(R_)
+            # convert reconstruction 'translation' and 'rotation' to oty exterior params
+            xyz, opk = _cv_ext_to_oty_ext(
+                shot_dict['translation'], shot_dict['rotation'], ref_xyz=ref_xyz
+            )
             cam_id = shot_dict['camera']
             cam_id = cam_id[3:] if cam_id.startswith('v2 ') else cam_id
             ext_param_dict[filename] = dict(xyz=xyz, opk=opk, camera=cam_id)
diff --git a/orthority/version.py b/orthority/version.py
@@ -13,4 +13,4 @@
 # You should have received a copy of the GNU Affero General Public License along with Orthority.
 # If not, see <https://www.gnu.org/licenses/>.
 
-__version__ = '0.5.1'
+__version__ = '0.6.0'
diff --git a/tests/test_fit.py b/tests/test_fit.py