Handlers API¶

Cloud Handler¶

`Mapillary` ¶

Class to interact with Mapillary's API to fetch image data.

Parameters:

Name	Type	Description	Default
`mapillary_token`	`str`	The authentication token for Mapillary.	required

Examples:

>>> mapillary = Mapillary("YOUR_TOKEN_HERE")
>>> images = mapillary.fetch_within_bbox([12.34, 56.78, 90.12, 34.56])

Source code in landlensdb/handlers/cloud.py

class Mapillary:
    """
    Class to interact with Mapillary's API to fetch image data.

    Args:
        mapillary_token (str): The authentication token for Mapillary.

    Examples:
        >>> mapillary = Mapillary("YOUR_TOKEN_HERE")
        >>> images = mapillary.fetch_within_bbox([12.34, 56.78, 90.12, 34.56])
    """

    BASE_URL = "https://graph.mapillary.com"
    TILES_URL = "https://tiles.mapillary.com"
    REQUIRED_FIELDS = ["id", "geometry"]
    FIELDS_LIST = [
        "id",
        "altitude",
        "atomic_scale",
        "camera_parameters",
        "camera_type",
        "captured_at",
        "compass_angle",
        "computed_altitude",
        "computed_compass_angle",
        "computed_geometry",
        "computed_rotation",
        "exif_orientation",
        "geometry",
        "height",
        "thumb_1024_url",
        "merge_cc",
        "mesh",
        "sequence",
        "sfm_cluster",
        "width",
        "detections",
        "quality_score"  # Added quality score field
    ]

    QUALITY_INDICATORS = [
        "quality_score",  # Primary quality indicator
        "computed_compass_angle",  # Secondary indicator
        "atomic_scale"  # Tertiary indicator
    ]
    IMAGE_URL_KEYS = [
        "thumb_256_url",
        "thumb_1024_url",
        "thumb_2048_url",
        "thumb_original_url",
    ]
    LIMIT = 2000
    TF = TimezoneFinder()
    ZOOM_LEVEL = 14  # Default zoom level for coverage tiles

    def __init__(self, mapillary_token):
        """
        Initialize a Mapillary object.

        Args:
            mapillary_token (str): The authentication token for Mapillary.
        """
        self.TOKEN = mapillary_token

    def _validate_fields(self, fields):
        """
        Validates the fields for fetching data.

        Args:
            fields (list): The fields to be validated.

        Raises:
            ValueError: If the required fields are missing.
        """
        if (
            "id" not in fields
            or "geometry" not in fields
            or not any(image_field in fields for image_field in self.IMAGE_URL_KEYS)
        ):
            raise ValueError(
                "Fields must contain 'id', 'geometry', and one of "
                + str(self.IMAGE_URL_KEYS)
            )

    @staticmethod
    def _split_bbox(inner_bbox):
        """
        Splits a bounding box into four quarters.

        Args:
            inner_bbox (list): A list representing the bounding box to split.

        Returns:
            list: A list of four bounding boxes, each representing a quarter.
        """
        x1, y1, x2, y2 = inner_bbox[:]
        xm = (x2 - x1) / 2
        ym = (y2 - y1) / 2

        q1 = [x1, y1, x1 + xm, y1 + ym]
        q2 = [x1 + xm, y1, x2, y1 + ym]
        q3 = [x1, y1 + ym, x1 + xm, y2]
        q4 = [x1 + xm, y1 + ym, x2, y2]

        return [q1, q2, q3, q4]

    def _json_to_gdf(self, json_data):
        """
        Converts JSON data from Mapillary to a GeoDataFrame.

        Args:
            json_data (list): A list of JSON data from Mapillary.

        Returns:
            GeoDataFrame: A GeoDataFrame containing the image data.
        """
        # Early return if no data
        if not json_data:
            return GeoDataFrame(geometry=[])

        for img in json_data:
            # Basic field conversions
            coords = img.get("geometry", {}).get("coordinates", [None, None])
            img["geometry"] = Point(coords)
            img["mly_id"] = img.pop("id")
            img["name"] = f"mly|{img['mly_id']}"

            # Handle computed geometry
            if "computed_geometry" in img:
                coords = img.get("computed_geometry", {}).get(
                    "coordinates", [None, None]
                )
                img["computed_geometry"] = Point(coords)

            # Process timestamp with timezone
            if "captured_at" in img:
                lat = img["geometry"].y
                lng = img["geometry"].x
                img["captured_at"] = self._process_timestamp(
                    img.get("captured_at"), lat, lng
                )

            # Set image URL from available options
            image_url_found = False
            for key in self.IMAGE_URL_KEYS:
                if key in img:
                    img["image_url"] = str(img.pop(key))  # Explicitly convert to string
                    image_url_found = True
                    break

            # If no image URL was found, set a placeholder URL
            # Instead of using a direct Mapillary API URL that might fail,
            # we'll use a placeholder that indicates the image URL is missing
            if not image_url_found:
                img["image_url"] = f"placeholder://mapillary/{img['mly_id']}"

            # Convert list parameters to strings
            for key in ["camera_parameters", "computed_rotation"]:
                if key in img and isinstance(img[key], list):
                    img[key] = ",".join(map(str, img[key]))

            # Calculate quality score if not present
            if "quality_score" not in img:
                quality_score = 0.0
                if "computed_compass_angle" in img:
                    quality_score += 0.5  # Good compass data
                if "atomic_scale" in img:
                    quality_score += 0.3  # Good scale data
                if img.get("camera_type"):
                    quality_score += 0.2  # Camera type available
                img["quality_score"] = quality_score

        # Create GeoDataFrame
        gdf = GeoDataFrame(json_data, crs="EPSG:4326")
        gdf.set_geometry("geometry", inplace=True)

        # Sort by quality indicators and drop duplicates by sequence
        if "sequence" in gdf.columns:
            sort_columns = [col for col in self.QUALITY_INDICATORS if col in gdf.columns]
            if sort_columns:
                gdf = gdf.sort_values(sort_columns, ascending=False)
                gdf = gdf.drop_duplicates(subset=['sequence'], keep='first')

        # Ensure image_url is a string type
        if "image_url" in gdf.columns:
            gdf["image_url"] = gdf["image_url"].astype(str)

        return gdf

    def _bbox_to_tile_coords(self, bbox, zoom):
        """
        Convert a bounding box to tile coordinates at a given zoom level.

        Args:
            bbox (list): [west, south, east, north] coordinates
            zoom (int): Zoom level

        Returns:
            tuple: (min_x, min_y, max_x, max_y) tile coordinates
        """
        def lat_to_tile_y(lat_deg, zoom):
            lat_rad = math.radians(lat_deg)
            n = 2.0 ** zoom
            return int((1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * n)

        def lon_to_tile_x(lon_deg, zoom):
            n = 2.0 ** zoom
            return int((lon_deg + 180.0) / 360.0 * n)

        west, south, east, north = bbox
        min_x = lon_to_tile_x(west, zoom)
        max_x = lon_to_tile_x(east, zoom)
        min_y = lat_to_tile_y(north, zoom)  # Note: y coordinates are inverted
        max_y = lat_to_tile_y(south, zoom)

        return min_x, min_y, max_x, max_y

    def _tile_to_bbox(self, tile, zoom_level):
        """
        Converts tile coordinates to a bounding box.

        Args:
            tile (dict): Tile coordinates (x, y).
            zoom_level (int): The zoom level of the tile.

        Returns:
            list: Bounding box coordinates [west, south, east, north].
        """
        x, y = tile['x'], tile['y']
        n = 2.0 ** zoom_level
        west = x / n * 360.0 - 180.0
        east = (x + 1) / n * 360.0 - 180.0

        def inv_lat(y_tile):
            return math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * y_tile / n))))

        north = inv_lat(y)
        south = inv_lat(y + 1)

        return [west, south, east, north]

    def _fetch_coverage_tile(self, zoom, x, y):
        """
        Fetches a single coverage tile.

        Args:
            zoom (int): Zoom level
            x (int): Tile X coordinate
            y (int): Tile Y coordinate

        Returns:
            list: Image features from the tile
        """
        url = (
            f"{self.TILES_URL}/maps/vtp/mly1_public/2"
            f"/{zoom}/{x}/{y}"
            f"?access_token={self.TOKEN}"
        )

        try:
            response = requests.get(url)
            if response.status_code == 200:
                # Vector tiles are binary, not JSON
                if 'application/x-protobuf' in response.headers.get('content-type', ''):
                    try:
                        # Decode the vector tile
                        tile_data = mapbox_vector_tile.decode(response.content)

                        # Check for image layer at zoom level 14
                        if 'image' in tile_data and zoom == 14:
                            return tile_data['image']['features']

                        # Check for sequence layer at zoom levels 6-14
                        elif 'sequence' in tile_data and 6 <= zoom <= 14:
                            return tile_data['sequence']['features']

                        # Check for overview layer at zoom levels 0-5
                        elif 'overview' in tile_data and 0 <= zoom <= 5:
                            return tile_data['overview']['features']

                        else:
                            warnings.warn(f"No usable layers found in tile {x},{y}")
                            return []

                    except Exception as e:
                        warnings.warn(f"Error decoding vector tile {x},{y}: {str(e)}")
                        return []
                else:
                    warnings.warn(f"Unexpected content type for tile {x},{y}")
                    return []
            else:
                warnings.warn(f"Error fetching tile {x},{y}: {response.status_code}")
                return []
        except Exception as e:
            warnings.warn(f"Exception fetching tile {x},{y}: {str(e)}")
            return []

    def _extract_image_ids_from_features(self, features):
        """
        Extracts image IDs from tile features.

        Args:
            features (list): List of features from a vector tile

        Returns:
            list: List of image IDs
        """
        image_ids = []

        for feature in features:
            if 'id' in feature.get('properties', {}):
                image_ids.append(str(feature['properties']['id']))
            elif 'image_id' in feature.get('properties', {}):
                image_ids.append(str(feature['properties']['image_id']))

        return image_ids

    def _fetch_image_metadata(self, image_ids, fields, max_workers=10):
        """
        Fetches metadata for multiple images using multi-threading.

        Args:
            image_ids (list): List of image IDs
            fields (list): Fields to include in the response
            max_workers (int, optional): Maximum number of concurrent workers. Default is 10.

        Returns:
            list: List of image metadata
        """
        results = []

        def fetch_single_image(image_id):
            url = (
                f"{self.BASE_URL}/{image_id}"
                f"?access_token={self.TOKEN}"
                f"&fields={','.join(fields)}"
            )

            try:
                response = requests.get(url)
                if response.status_code == 200:
                    return response.json()
                else:
                    warnings.warn(f"Error fetching image {image_id}: {response.status_code}")
                    return None
            except Exception as e:
                warnings.warn(f"Exception fetching image {image_id}: {str(e)}")
                return None

        # Use ThreadPoolExecutor for parallel fetching
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit all tasks and create a map of future to image_id
            future_to_id = {executor.submit(fetch_single_image, image_id): image_id
                           for image_id in image_ids}

            # Process results as they complete with a progress bar
            for future in tqdm(as_completed(future_to_id),
                              total=len(image_ids),
                              desc="Fetching metadata"):
                result = future.result()
                if result:
                    results.append(result)

        return results

    def fetch_within_bbox(
        self,
        initial_bbox,
        start_date=None,
        end_date=None,
        fields=None,
        max_recursion_depth=25,
        use_coverage_tiles=True,
        max_images=5000,
        max_workers=10
    ):
        """
        Fetches images within a bounding box.

        Args:
            initial_bbox (list): The bounding box to fetch images from [west, south, east, north].
            start_date (str, optional): Start date for filtering images (YYYY-MM-DD).
            end_date (str, optional): End date for filtering images (YYYY-MM-DD).
            fields (list, optional): Fields to include in the response.
            max_recursion_depth (int, optional): Maximum depth for recursive fetching.
            use_coverage_tiles (bool, optional): Whether to use coverage tiles API for large areas.
            max_images (int, optional): Maximum number of images to process. Default is 5000.
            max_workers (int, optional): Maximum number of concurrent workers. Default is 10.

        Returns:
            GeoImageFrame: A GeoImageFrame containing the image data.
        """
        if fields is None:
            fields = self.FIELDS_LIST

        # Ensure required fields are included
        if "id" not in fields:
            fields.append("id")
        if "geometry" not in fields:
            fields.append("geometry")
        if not any(url_key in fields for url_key in self.IMAGE_URL_KEYS):
            fields.append("thumb_1024_url")

        start_timestamp = self._get_timestamp(start_date) if start_date else None
        end_timestamp = self._get_timestamp(end_date, True) if end_date else None

        if use_coverage_tiles:
            # Get coverage tiles for the area
            min_x, min_y, max_x, max_y = self._bbox_to_tile_coords(initial_bbox, self.ZOOM_LEVEL)

            all_image_ids = []
            print(f"Fetching {(max_x - min_x + 1) * (max_y - min_y + 1)} tiles...")

            # Fetch all tiles in the bounding box
            for x in range(min_x, max_x + 1):
                for y in range(min_y, max_y + 1):
                    features = self._fetch_coverage_tile(self.ZOOM_LEVEL, x, y)
                    image_ids = self._extract_image_ids_from_features(features)
                    all_image_ids.extend(image_ids)

                    # Check if we've reached the maximum number of images
                    if len(all_image_ids) >= max_images * 2:  # Get more than needed to allow for filtering
                        print(f"Reached maximum number of images ({max_images}), stopping tile fetching")
                        break

                # Check again after processing a row of tiles
                if len(all_image_ids) >= max_images * 2:
                    break

            print(f"Found {len(all_image_ids)} total images")

            # Remove duplicates
            all_image_ids = list(set(all_image_ids))
            print(f"After removing duplicates: {len(all_image_ids)} unique images")

            # Limit the number of images to process
            if len(all_image_ids) > max_images:
                print(f"Limiting to {max_images} images for processing")
                all_image_ids = all_image_ids[:max_images]

            # Fetch metadata for all images using multi-threading
            all_data = self._fetch_image_metadata(all_image_ids, fields, max_workers=max_workers)

            data = self._json_to_gdf(all_data)
            return GeoImageFrame(data, geometry="geometry")
        else:
            # Use traditional recursive fetching
            data = self._recursive_fetch(
                initial_bbox,
                fields,
                start_timestamp,
                end_timestamp,
                max_recursion_depth=max_recursion_depth
            )
            gdf = self._json_to_gdf(data)
            return GeoImageFrame(gdf, geometry="geometry")

    def fetch_by_id(self, image_id, fields=None):
        """
        Fetches an image by its ID.

        Args:
            image_id (str): The ID of the image to fetch.
            fields (list, optional): The fields to include in the response.

        Returns:
            GeoImageFrame: A GeoImageFrame containing the fetched image.

        Raises:
            Exception: If the connection to Mapillary API fails.
        """
        if fields is None:
            fields = self.FIELDS_LIST
        else:
            self._validate_fields(fields)
        url = (
            f"{self.BASE_URL}/{image_id}"
            f"?access_token={self.TOKEN}"
            f"&fields={','.join(fields)}"
        )
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(
                f"Error connecting to Mapillary API. Exception: {response.text}"
            )
        data = self._json_to_gdf([response.json()])
        return GeoImageFrame(data, geometry="geometry")

    def fetch_by_sequence(self, sequence_ids, fields=None):
        """
        Fetches images by their sequence IDs.

        Args:
            sequence_ids (list): The sequence IDs to fetch images from.
            fields (list, optional): The fields to include in the response.

        Returns:
            GeoImageFrame: A GeoImageFrame containing the fetched images.

        Raises:
            Exception: If the connection to Mapillary API fails.
        """
        if fields is None:
            fields = self.FIELDS_LIST
        else:
            self._validate_fields(fields)
        url = (
            f"{self.BASE_URL}/images"
            f"?access_token={self.TOKEN}"
            f"&sequence_ids={','.join(sequence_ids)}"
            f"&fields={','.join(fields)}"
        )
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(
                f"Error connecting to Mapillary API. Exception: {response.text}"
            )
        response_data = response.json().get("data")
        if len(response_data) == self.LIMIT:
            raise Exception(
                "Data count reached the limit. Please provide fewer sequence IDs."
            )

        data = self._json_to_gdf(response_data)
        return GeoImageFrame(data, geometry="geometry")

    @staticmethod
    def _get_timestamp(date_string, end_of_day=False):
        """
        Converts a date string to a timestamp.

        Args:
            date_string (str): The date string to convert.
            end_of_day (bool, optional): Whether to set the timestamp to the end of the day.

        Returns:
            str: The timestamp corresponding to the date string.
        """
        if not date_string:
            return None

        tz = timezone.utc
        dt = datetime.strptime(date_string, "%Y-%m-%d")
        if end_of_day:
            dt = dt.replace(hour=23, minute=59, second=59)
        timestamp = (
            dt.astimezone(tz).replace(microsecond=0).isoformat().replace("+00:00", "Z")
        )
        return timestamp

    def _process_timestamp(self, epoch_time_ms, lat, lng):
        """
        Converts the given epoch time in milliseconds to an ISO-formatted timestamp adjusted to the local timezone
        based on the provided latitude and longitude coordinates.

        Args:
            epoch_time_ms (int): Epoch time in milliseconds.
            lat (float): Latitude coordinate for the timezone conversion.
            lng (float): Longitude coordinate for the timezone conversion.

        Returns:
            str: An ISO-formatted timestamp in the local timezone if timezone information is found, otherwise in UTC.

        Example:
            >>> _process_timestamp(1630456103000, 37.7749, -122.4194)
            '2021-09-01T09:55:03-07:00'
        """
        if not epoch_time_ms:
            return None
        epoch_time = epoch_time_ms / 1000
        dt_utc = datetime.fromtimestamp(epoch_time, tz=timezone.utc)

        tz_name = self.TF.timezone_at(lat=lat, lng=lng)
        if tz_name:
            local_tz = pytz.timezone(tz_name)
            return dt_utc.astimezone(local_tz).isoformat()
        else:
            return dt_utc.isoformat()

    def _recursive_fetch(
        self,
        bbox,
        fields,
        start_timestamp=None,
        end_timestamp=None,
        current_depth=0,
        max_recursion_depth=None,
    ):
        """
        Recursively fetches images within a bounding box, considering timestamps.

        Args:
            bbox (list): The bounding box to fetch images from.
            fields (list): The fields to include in the response.
            start_timestamp (str, optional): The starting timestamp for filtering images.
            end_timestamp (str, optional): The ending timestamp for filtering images.
            current_depth (int, optional): Current depth of recursion.
            max_recursion_depth (int, optional): Maximum depth of recursion.

        Returns:
            list: A list of image data.

        Raises:
            Exception: If the connection to Mapillary API fails.
        """
        if max_recursion_depth is not None and current_depth > max_recursion_depth:
            warnings.warn(
                "Max recursion depth reached. Consider splitting requests."
            )
            return []

        url = (
            f"{self.BASE_URL}/images"
            f"?access_token={self.TOKEN}"
            f"&fields={','.join(fields)}"
            f"&bbox={','.join(str(i) for i in bbox)}"
            f"&limit={self.LIMIT}"
        )

        if start_timestamp:
            url += f"&start_captured_at={start_timestamp}"
        if end_timestamp:
            url += f"&end_captured_at={end_timestamp}"

        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(
                f"Error connecting to Mapillary API. Exception: {response.text}"
            )

        response_data = response.json().get("data")
        if len(response_data) == self.LIMIT:
            child_bboxes = self._split_bbox(bbox)
            data = []
            for child_bbox in child_bboxes:
                data.extend(
                    self._recursive_fetch(
                        child_bbox,
                        fields,
                        start_timestamp,
                        end_timestamp,
                        current_depth=current_depth + 1,
                        max_recursion_depth=max_recursion_depth,
                    )
                )
            return data
        else:
            return response_data

`init(mapillary_token)` ¶

Initialize a Mapillary object.

Parameters:

Name	Type	Description	Default
`mapillary_token`	`str`	The authentication token for Mapillary.	required

Source code in landlensdb/handlers/cloud.py

def __init__(self, mapillary_token):
    """
    Initialize a Mapillary object.

    Args:
        mapillary_token (str): The authentication token for Mapillary.
    """
    self.TOKEN = mapillary_token

`_bbox_to_tile_coords(bbox, zoom)` ¶

Convert a bounding box to tile coordinates at a given zoom level.

Parameters:

Name	Type	Description	Default
`bbox`	`list`	[west, south, east, north] coordinates	required
`zoom`	`int`	Zoom level	required

Returns:

Name	Type	Description
`tuple`		(min_x, min_y, max_x, max_y) tile coordinates

Source code in landlensdb/handlers/cloud.py

def _bbox_to_tile_coords(self, bbox, zoom):
    """
    Convert a bounding box to tile coordinates at a given zoom level.

    Args:
        bbox (list): [west, south, east, north] coordinates
        zoom (int): Zoom level

    Returns:
        tuple: (min_x, min_y, max_x, max_y) tile coordinates
    """
    def lat_to_tile_y(lat_deg, zoom):
        lat_rad = math.radians(lat_deg)
        n = 2.0 ** zoom
        return int((1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * n)

    def lon_to_tile_x(lon_deg, zoom):
        n = 2.0 ** zoom
        return int((lon_deg + 180.0) / 360.0 * n)

    west, south, east, north = bbox
    min_x = lon_to_tile_x(west, zoom)
    max_x = lon_to_tile_x(east, zoom)
    min_y = lat_to_tile_y(north, zoom)  # Note: y coordinates are inverted
    max_y = lat_to_tile_y(south, zoom)

    return min_x, min_y, max_x, max_y

`_extract_image_ids_from_features(features)` ¶

Extracts image IDs from tile features.

Parameters:

Name	Type	Description	Default
`features`	`list`	List of features from a vector tile	required

Returns:

Name	Type	Description
`list`		List of image IDs

Source code in landlensdb/handlers/cloud.py

def _extract_image_ids_from_features(self, features):
    """
    Extracts image IDs from tile features.

    Args:
        features (list): List of features from a vector tile

    Returns:
        list: List of image IDs
    """
    image_ids = []

    for feature in features:
        if 'id' in feature.get('properties', {}):
            image_ids.append(str(feature['properties']['id']))
        elif 'image_id' in feature.get('properties', {}):
            image_ids.append(str(feature['properties']['image_id']))

    return image_ids

`_fetch_coverage_tile(zoom, x, y)` ¶

Fetches a single coverage tile.

Parameters:

Name	Type	Description	Default
`zoom`	`int`	Zoom level	required
`x`	`int`	Tile X coordinate	required
`y`	`int`	Tile Y coordinate	required

Returns:

Name	Type	Description
`list`		Image features from the tile

Source code in landlensdb/handlers/cloud.py

def _fetch_coverage_tile(self, zoom, x, y):
    """
    Fetches a single coverage tile.

    Args:
        zoom (int): Zoom level
        x (int): Tile X coordinate
        y (int): Tile Y coordinate

    Returns:
        list: Image features from the tile
    """
    url = (
        f"{self.TILES_URL}/maps/vtp/mly1_public/2"
        f"/{zoom}/{x}/{y}"
        f"?access_token={self.TOKEN}"
    )

    try:
        response = requests.get(url)
        if response.status_code == 200:
            # Vector tiles are binary, not JSON
            if 'application/x-protobuf' in response.headers.get('content-type', ''):
                try:
                    # Decode the vector tile
                    tile_data = mapbox_vector_tile.decode(response.content)

                    # Check for image layer at zoom level 14
                    if 'image' in tile_data and zoom == 14:
                        return tile_data['image']['features']

                    # Check for sequence layer at zoom levels 6-14
                    elif 'sequence' in tile_data and 6 <= zoom <= 14:
                        return tile_data['sequence']['features']

                    # Check for overview layer at zoom levels 0-5
                    elif 'overview' in tile_data and 0 <= zoom <= 5:
                        return tile_data['overview']['features']

                    else:
                        warnings.warn(f"No usable layers found in tile {x},{y}")
                        return []

                except Exception as e:
                    warnings.warn(f"Error decoding vector tile {x},{y}: {str(e)}")
                    return []
            else:
                warnings.warn(f"Unexpected content type for tile {x},{y}")
                return []
        else:
            warnings.warn(f"Error fetching tile {x},{y}: {response.status_code}")
            return []
    except Exception as e:
        warnings.warn(f"Exception fetching tile {x},{y}: {str(e)}")
        return []

`_fetch_image_metadata(image_ids, fields, max_workers=10)` ¶

Fetches metadata for multiple images using multi-threading.

Parameters:

Name	Type	Description	Default
`image_ids`	`list`	List of image IDs	required
`fields`	`list`	Fields to include in the response	required
`max_workers`	`int`	Maximum number of concurrent workers. Default is 10.	`10`

Returns:

Name	Type	Description
`list`		List of image metadata

Source code in landlensdb/handlers/cloud.py

def _fetch_image_metadata(self, image_ids, fields, max_workers=10):
    """
    Fetches metadata for multiple images using multi-threading.

    Args:
        image_ids (list): List of image IDs
        fields (list): Fields to include in the response
        max_workers (int, optional): Maximum number of concurrent workers. Default is 10.

    Returns:
        list: List of image metadata
    """
    results = []

    def fetch_single_image(image_id):
        url = (
            f"{self.BASE_URL}/{image_id}"
            f"?access_token={self.TOKEN}"
            f"&fields={','.join(fields)}"
        )

        try:
            response = requests.get(url)
            if response.status_code == 200:
                return response.json()
            else:
                warnings.warn(f"Error fetching image {image_id}: {response.status_code}")
                return None
        except Exception as e:
            warnings.warn(f"Exception fetching image {image_id}: {str(e)}")
            return None

    # Use ThreadPoolExecutor for parallel fetching
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all tasks and create a map of future to image_id
        future_to_id = {executor.submit(fetch_single_image, image_id): image_id
                       for image_id in image_ids}

        # Process results as they complete with a progress bar
        for future in tqdm(as_completed(future_to_id),
                          total=len(image_ids),
                          desc="Fetching metadata"):
            result = future.result()
            if result:
                results.append(result)

    return results

`_get_timestamp(date_string, end_of_day=False)` `staticmethod` ¶

Converts a date string to a timestamp.

Parameters:

Name	Type	Description	Default
`date_string`	`str`	The date string to convert.	required
`end_of_day`	`bool`	Whether to set the timestamp to the end of the day.	`False`

Returns:

Name	Type	Description
`str`		The timestamp corresponding to the date string.

Source code in landlensdb/handlers/cloud.py

@staticmethod
def _get_timestamp(date_string, end_of_day=False):
    """
    Converts a date string to a timestamp.

    Args:
        date_string (str): The date string to convert.
        end_of_day (bool, optional): Whether to set the timestamp to the end of the day.

    Returns:
        str: The timestamp corresponding to the date string.
    """
    if not date_string:
        return None

    tz = timezone.utc
    dt = datetime.strptime(date_string, "%Y-%m-%d")
    if end_of_day:
        dt = dt.replace(hour=23, minute=59, second=59)
    timestamp = (
        dt.astimezone(tz).replace(microsecond=0).isoformat().replace("+00:00", "Z")
    )
    return timestamp

`_json_to_gdf(json_data)` ¶

Converts JSON data from Mapillary to a GeoDataFrame.

Parameters:

Name	Type	Description	Default
`json_data`	`list`	A list of JSON data from Mapillary.	required

Returns:

Name	Type	Description
`GeoDataFrame`		A GeoDataFrame containing the image data.

Source code in landlensdb/handlers/cloud.py

def _json_to_gdf(self, json_data):
    """
    Converts JSON data from Mapillary to a GeoDataFrame.

    Args:
        json_data (list): A list of JSON data from Mapillary.

    Returns:
        GeoDataFrame: A GeoDataFrame containing the image data.
    """
    # Early return if no data
    if not json_data:
        return GeoDataFrame(geometry=[])

    for img in json_data:
        # Basic field conversions
        coords = img.get("geometry", {}).get("coordinates", [None, None])
        img["geometry"] = Point(coords)
        img["mly_id"] = img.pop("id")
        img["name"] = f"mly|{img['mly_id']}"

        # Handle computed geometry
        if "computed_geometry" in img:
            coords = img.get("computed_geometry", {}).get(
                "coordinates", [None, None]
            )
            img["computed_geometry"] = Point(coords)

        # Process timestamp with timezone
        if "captured_at" in img:
            lat = img["geometry"].y
            lng = img["geometry"].x
            img["captured_at"] = self._process_timestamp(
                img.get("captured_at"), lat, lng
            )

        # Set image URL from available options
        image_url_found = False
        for key in self.IMAGE_URL_KEYS:
            if key in img:
                img["image_url"] = str(img.pop(key))  # Explicitly convert to string
                image_url_found = True
                break

        # If no image URL was found, set a placeholder URL
        # Instead of using a direct Mapillary API URL that might fail,
        # we'll use a placeholder that indicates the image URL is missing
        if not image_url_found:
            img["image_url"] = f"placeholder://mapillary/{img['mly_id']}"

        # Convert list parameters to strings
        for key in ["camera_parameters", "computed_rotation"]:
            if key in img and isinstance(img[key], list):
                img[key] = ",".join(map(str, img[key]))

        # Calculate quality score if not present
        if "quality_score" not in img:
            quality_score = 0.0
            if "computed_compass_angle" in img:
                quality_score += 0.5  # Good compass data
            if "atomic_scale" in img:
                quality_score += 0.3  # Good scale data
            if img.get("camera_type"):
                quality_score += 0.2  # Camera type available
            img["quality_score"] = quality_score

    # Create GeoDataFrame
    gdf = GeoDataFrame(json_data, crs="EPSG:4326")
    gdf.set_geometry("geometry", inplace=True)

    # Sort by quality indicators and drop duplicates by sequence
    if "sequence" in gdf.columns:
        sort_columns = [col for col in self.QUALITY_INDICATORS if col in gdf.columns]
        if sort_columns:
            gdf = gdf.sort_values(sort_columns, ascending=False)
            gdf = gdf.drop_duplicates(subset=['sequence'], keep='first')

    # Ensure image_url is a string type
    if "image_url" in gdf.columns:
        gdf["image_url"] = gdf["image_url"].astype(str)

    return gdf

`_process_timestamp(epoch_time_ms, lat, lng)` ¶

Converts the given epoch time in milliseconds to an ISO-formatted timestamp adjusted to the local timezone based on the provided latitude and longitude coordinates.

Parameters:

Name	Type	Description	Default
`epoch_time_ms`	`int`	Epoch time in milliseconds.	required
`lat`	`float`	Latitude coordinate for the timezone conversion.	required
`lng`	`float`	Longitude coordinate for the timezone conversion.	required

Returns:

Name	Type	Description
`str`		An ISO-formatted timestamp in the local timezone if timezone information is found, otherwise in UTC.

Example

_process_timestamp(1630456103000, 37.7749, -122.4194) '2021-09-01T09:55:03-07:00'

Source code in landlensdb/handlers/cloud.py

def _process_timestamp(self, epoch_time_ms, lat, lng):
    """
    Converts the given epoch time in milliseconds to an ISO-formatted timestamp adjusted to the local timezone
    based on the provided latitude and longitude coordinates.

    Args:
        epoch_time_ms (int): Epoch time in milliseconds.
        lat (float): Latitude coordinate for the timezone conversion.
        lng (float): Longitude coordinate for the timezone conversion.

    Returns:
        str: An ISO-formatted timestamp in the local timezone if timezone information is found, otherwise in UTC.

    Example:
        >>> _process_timestamp(1630456103000, 37.7749, -122.4194)
        '2021-09-01T09:55:03-07:00'
    """
    if not epoch_time_ms:
        return None
    epoch_time = epoch_time_ms / 1000
    dt_utc = datetime.fromtimestamp(epoch_time, tz=timezone.utc)

    tz_name = self.TF.timezone_at(lat=lat, lng=lng)
    if tz_name:
        local_tz = pytz.timezone(tz_name)
        return dt_utc.astimezone(local_tz).isoformat()
    else:
        return dt_utc.isoformat()

`_recursive_fetch(bbox, fields, start_timestamp=None, end_timestamp=None, current_depth=0, max_recursion_depth=None)` ¶

Recursively fetches images within a bounding box, considering timestamps.

Parameters:

Name	Type	Description	Default
`bbox`	`list`	The bounding box to fetch images from.	required
`fields`	`list`	The fields to include in the response.	required
`start_timestamp`	`str`	The starting timestamp for filtering images.	`None`
`end_timestamp`	`str`	The ending timestamp for filtering images.	`None`
`current_depth`	`int`	Current depth of recursion.	`0`
`max_recursion_depth`	`int`	Maximum depth of recursion.	`None`

Returns:

Name	Type	Description
`list`		A list of image data.

Raises:

Type	Description
`Exception`	If the connection to Mapillary API fails.

Source code in landlensdb/handlers/cloud.py

def _recursive_fetch(
    self,
    bbox,
    fields,
    start_timestamp=None,
    end_timestamp=None,
    current_depth=0,
    max_recursion_depth=None,
):
    """
    Recursively fetches images within a bounding box, considering timestamps.

    Args:
        bbox (list): The bounding box to fetch images from.
        fields (list): The fields to include in the response.
        start_timestamp (str, optional): The starting timestamp for filtering images.
        end_timestamp (str, optional): The ending timestamp for filtering images.
        current_depth (int, optional): Current depth of recursion.
        max_recursion_depth (int, optional): Maximum depth of recursion.

    Returns:
        list: A list of image data.

    Raises:
        Exception: If the connection to Mapillary API fails.
    """
    if max_recursion_depth is not None and current_depth > max_recursion_depth:
        warnings.warn(
            "Max recursion depth reached. Consider splitting requests."
        )
        return []

    url = (
        f"{self.BASE_URL}/images"
        f"?access_token={self.TOKEN}"
        f"&fields={','.join(fields)}"
        f"&bbox={','.join(str(i) for i in bbox)}"
        f"&limit={self.LIMIT}"
    )

    if start_timestamp:
        url += f"&start_captured_at={start_timestamp}"
    if end_timestamp:
        url += f"&end_captured_at={end_timestamp}"

    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(
            f"Error connecting to Mapillary API. Exception: {response.text}"
        )

    response_data = response.json().get("data")
    if len(response_data) == self.LIMIT:
        child_bboxes = self._split_bbox(bbox)
        data = []
        for child_bbox in child_bboxes:
            data.extend(
                self._recursive_fetch(
                    child_bbox,
                    fields,
                    start_timestamp,
                    end_timestamp,
                    current_depth=current_depth + 1,
                    max_recursion_depth=max_recursion_depth,
                )
            )
        return data
    else:
        return response_data

`_split_bbox(inner_bbox)` `staticmethod` ¶

Splits a bounding box into four quarters.

Parameters:

Name	Type	Description	Default
`inner_bbox`	`list`	A list representing the bounding box to split.	required

Returns:

Name	Type	Description
`list`		A list of four bounding boxes, each representing a quarter.

Source code in landlensdb/handlers/cloud.py

@staticmethod
def _split_bbox(inner_bbox):
    """
    Splits a bounding box into four quarters.

    Args:
        inner_bbox (list): A list representing the bounding box to split.

    Returns:
        list: A list of four bounding boxes, each representing a quarter.
    """
    x1, y1, x2, y2 = inner_bbox[:]
    xm = (x2 - x1) / 2
    ym = (y2 - y1) / 2

    q1 = [x1, y1, x1 + xm, y1 + ym]
    q2 = [x1 + xm, y1, x2, y1 + ym]
    q3 = [x1, y1 + ym, x1 + xm, y2]
    q4 = [x1 + xm, y1 + ym, x2, y2]

    return [q1, q2, q3, q4]

`_tile_to_bbox(tile, zoom_level)` ¶

Converts tile coordinates to a bounding box.

Parameters:

Name	Type	Description	Default
`tile`	`dict`	Tile coordinates (x, y).	required
`zoom_level`	`int`	The zoom level of the tile.	required

Returns:

Name	Type	Description
`list`		Bounding box coordinates [west, south, east, north].

Source code in landlensdb/handlers/cloud.py

def _tile_to_bbox(self, tile, zoom_level):
    """
    Converts tile coordinates to a bounding box.

    Args:
        tile (dict): Tile coordinates (x, y).
        zoom_level (int): The zoom level of the tile.

    Returns:
        list: Bounding box coordinates [west, south, east, north].
    """
    x, y = tile['x'], tile['y']
    n = 2.0 ** zoom_level
    west = x / n * 360.0 - 180.0
    east = (x + 1) / n * 360.0 - 180.0

    def inv_lat(y_tile):
        return math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * y_tile / n))))

    north = inv_lat(y)
    south = inv_lat(y + 1)

    return [west, south, east, north]

`_validate_fields(fields)` ¶

Validates the fields for fetching data.

Parameters:

Name	Type	Description	Default
`fields`	`list`	The fields to be validated.	required

Raises:

Type	Description
`ValueError`	If the required fields are missing.

Source code in landlensdb/handlers/cloud.py

def _validate_fields(self, fields):
    """
    Validates the fields for fetching data.

    Args:
        fields (list): The fields to be validated.

    Raises:
        ValueError: If the required fields are missing.
    """
    if (
        "id" not in fields
        or "geometry" not in fields
        or not any(image_field in fields for image_field in self.IMAGE_URL_KEYS)
    ):
        raise ValueError(
            "Fields must contain 'id', 'geometry', and one of "
            + str(self.IMAGE_URL_KEYS)
        )

`fetch_by_id(image_id, fields=None)` ¶

Fetches an image by its ID.

Parameters:

Name	Type	Description	Default
`image_id`	`str`	The ID of the image to fetch.	required
`fields`	`list`	The fields to include in the response.	`None`

Returns:

Name	Type	Description
`GeoImageFrame`		A GeoImageFrame containing the fetched image.

Raises:

Type	Description
`Exception`	If the connection to Mapillary API fails.

Source code in landlensdb/handlers/cloud.py

def fetch_by_id(self, image_id, fields=None):
    """
    Fetches an image by its ID.

    Args:
        image_id (str): The ID of the image to fetch.
        fields (list, optional): The fields to include in the response.

    Returns:
        GeoImageFrame: A GeoImageFrame containing the fetched image.

    Raises:
        Exception: If the connection to Mapillary API fails.
    """
    if fields is None:
        fields = self.FIELDS_LIST
    else:
        self._validate_fields(fields)
    url = (
        f"{self.BASE_URL}/{image_id}"
        f"?access_token={self.TOKEN}"
        f"&fields={','.join(fields)}"
    )
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(
            f"Error connecting to Mapillary API. Exception: {response.text}"
        )
    data = self._json_to_gdf([response.json()])
    return GeoImageFrame(data, geometry="geometry")

`fetch_by_sequence(sequence_ids, fields=None)` ¶

Fetches images by their sequence IDs.

Parameters:

Name	Type	Description	Default
`sequence_ids`	`list`	The sequence IDs to fetch images from.	required
`fields`	`list`	The fields to include in the response.	`None`

Returns:

Name	Type	Description
`GeoImageFrame`		A GeoImageFrame containing the fetched images.

Raises:

Type	Description
`Exception`	If the connection to Mapillary API fails.

Source code in landlensdb/handlers/cloud.py

def fetch_by_sequence(self, sequence_ids, fields=None):
    """
    Fetches images by their sequence IDs.

    Args:
        sequence_ids (list): The sequence IDs to fetch images from.
        fields (list, optional): The fields to include in the response.

    Returns:
        GeoImageFrame: A GeoImageFrame containing the fetched images.

    Raises:
        Exception: If the connection to Mapillary API fails.
    """
    if fields is None:
        fields = self.FIELDS_LIST
    else:
        self._validate_fields(fields)
    url = (
        f"{self.BASE_URL}/images"
        f"?access_token={self.TOKEN}"
        f"&sequence_ids={','.join(sequence_ids)}"
        f"&fields={','.join(fields)}"
    )
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(
            f"Error connecting to Mapillary API. Exception: {response.text}"
        )
    response_data = response.json().get("data")
    if len(response_data) == self.LIMIT:
        raise Exception(
            "Data count reached the limit. Please provide fewer sequence IDs."
        )

    data = self._json_to_gdf(response_data)
    return GeoImageFrame(data, geometry="geometry")

`fetch_within_bbox(initial_bbox, start_date=None, end_date=None, fields=None, max_recursion_depth=25, use_coverage_tiles=True, max_images=5000, max_workers=10)` ¶

Fetches images within a bounding box.

Parameters:

Name	Type	Description	Default
`initial_bbox`	`list`	The bounding box to fetch images from [west, south, east, north].	required
`start_date`	`str`	Start date for filtering images (YYYY-MM-DD).	`None`
`end_date`	`str`	End date for filtering images (YYYY-MM-DD).	`None`
`fields`	`list`	Fields to include in the response.	`None`
`max_recursion_depth`	`int`	Maximum depth for recursive fetching.	`25`
`use_coverage_tiles`	`bool`	Whether to use coverage tiles API for large areas.	`True`
`max_images`	`int`	Maximum number of images to process. Default is 5000.	`5000`
`max_workers`	`int`	Maximum number of concurrent workers. Default is 10.	`10`

Returns:

Name	Type	Description
`GeoImageFrame`		A GeoImageFrame containing the image data.

Source code in landlensdb/handlers/cloud.py

def fetch_within_bbox(
    self,
    initial_bbox,
    start_date=None,
    end_date=None,
    fields=None,
    max_recursion_depth=25,
    use_coverage_tiles=True,
    max_images=5000,
    max_workers=10
):
    """
    Fetches images within a bounding box.

    Args:
        initial_bbox (list): The bounding box to fetch images from [west, south, east, north].
        start_date (str, optional): Start date for filtering images (YYYY-MM-DD).
        end_date (str, optional): End date for filtering images (YYYY-MM-DD).
        fields (list, optional): Fields to include in the response.
        max_recursion_depth (int, optional): Maximum depth for recursive fetching.
        use_coverage_tiles (bool, optional): Whether to use coverage tiles API for large areas.
        max_images (int, optional): Maximum number of images to process. Default is 5000.
        max_workers (int, optional): Maximum number of concurrent workers. Default is 10.

    Returns:
        GeoImageFrame: A GeoImageFrame containing the image data.
    """
    if fields is None:
        fields = self.FIELDS_LIST

    # Ensure required fields are included
    if "id" not in fields:
        fields.append("id")
    if "geometry" not in fields:
        fields.append("geometry")
    if not any(url_key in fields for url_key in self.IMAGE_URL_KEYS):
        fields.append("thumb_1024_url")

    start_timestamp = self._get_timestamp(start_date) if start_date else None
    end_timestamp = self._get_timestamp(end_date, True) if end_date else None

    if use_coverage_tiles:
        # Get coverage tiles for the area
        min_x, min_y, max_x, max_y = self._bbox_to_tile_coords(initial_bbox, self.ZOOM_LEVEL)

        all_image_ids = []
        print(f"Fetching {(max_x - min_x + 1) * (max_y - min_y + 1)} tiles...")

        # Fetch all tiles in the bounding box
        for x in range(min_x, max_x + 1):
            for y in range(min_y, max_y + 1):
                features = self._fetch_coverage_tile(self.ZOOM_LEVEL, x, y)
                image_ids = self._extract_image_ids_from_features(features)
                all_image_ids.extend(image_ids)

                # Check if we've reached the maximum number of images
                if len(all_image_ids) >= max_images * 2:  # Get more than needed to allow for filtering
                    print(f"Reached maximum number of images ({max_images}), stopping tile fetching")
                    break

            # Check again after processing a row of tiles
            if len(all_image_ids) >= max_images * 2:
                break

        print(f"Found {len(all_image_ids)} total images")

        # Remove duplicates
        all_image_ids = list(set(all_image_ids))
        print(f"After removing duplicates: {len(all_image_ids)} unique images")

        # Limit the number of images to process
        if len(all_image_ids) > max_images:
            print(f"Limiting to {max_images} images for processing")
            all_image_ids = all_image_ids[:max_images]

        # Fetch metadata for all images using multi-threading
        all_data = self._fetch_image_metadata(all_image_ids, fields, max_workers=max_workers)

        data = self._json_to_gdf(all_data)
        return GeoImageFrame(data, geometry="geometry")
    else:
        # Use traditional recursive fetching
        data = self._recursive_fetch(
            initial_bbox,
            fields,
            start_timestamp,
            end_timestamp,
            max_recursion_depth=max_recursion_depth
        )
        gdf = self._json_to_gdf(data)
        return GeoImageFrame(gdf, geometry="geometry")

DB Handler¶

`Postgres` ¶

A class for managing image-related postgres database operations.

Attributes:

Name	Type	Description
`DATABASE_URL`	`str`	The URL of the database to connect to.
`engine`	`Engine`	SQLAlchemy engine for database connections.
`result_set`	`ResultProxy`	The result of the last query executed.
`selected_table`	`Table`	The table object for query operations.

Source code in landlensdb/handlers/db.py

class Postgres:
    """
    A class for managing image-related postgres database operations.

    Attributes:
        DATABASE_URL (str): The URL of the database to connect to.
        engine (Engine): SQLAlchemy engine for database connections.
        result_set (ResultProxy): The result of the last query executed.
        selected_table (Table): The table object for query operations.
    """

    def __init__(self, database_url):
        """
        Initializes the ImageDB class with the given database URL.

        Args:
            database_url (str): The URL of the database to connect to.
        """
        self.DATABASE_URL = database_url
        self.engine = create_engine(self.DATABASE_URL)
        self.result_set = None
        self.selected_table = None

    @staticmethod
    def _convert_points_to_wkt(record):
        """
        Converts Point objects to WKT (Well-Known Text) format.

        Args:
            record (dict): A dictionary containing keys and values, where values can be Point objects.

        Returns:
            dict: The record with Point objects converted to WKT strings.
        """
        for key, value in record.items():
            if isinstance(value, Point):
                record[key] = value.wkt
        return record

    @staticmethod
    def _convert_dicts_to_json(record):
        """
        Converts dictionary values in a record to JSON strings.

        Args:
            record (dict): A dictionary where values may include other dictionaries.

        Returns:
            dict: The modified record with dict values converted to JSON strings.
        """
        for key, value in record.items():
            if isinstance(value, dict):
                record[key] = json.dumps(value)
        return record

    def table(self, table_name):
        """
        Selects a table for performing queries on.

        Args:
            table_name (str): Name of the table to select.

        Returns:
            ImageDB: Returns self to enable method chaining.
        """
        metadata = MetaData()
        self.selected_table = Table(table_name, metadata, autoload_with=self.engine)
        self.result_set = self.selected_table.select()
        return self

    def filter(self, **kwargs):
        """
        Applies filters to the selected table based on provided conditions.

        Args:
            **kwargs: Key-value pairs representing filters to apply.

        Returns:
            ImageDB: Returns self to enable method chaining.

        Raises:
            ValueError: If an unsupported operation or a nonexistent column is specified.
        """
        filters = []

        for k, v in kwargs.items():
            if "__" in k:
                field_name, operation = k.split("__", 1)
            else:
                field_name = k
                operation = "eq"

            column = getattr(self.selected_table.columns, field_name, None)
            if column is None:
                raise ValueError(
                    f"Column '{field_name}' not found in table '{self.selected_table.name}'"
                )

            if operation == "eq":
                filters.append(column == v)
            elif operation == "gt":
                filters.append(column > v)
            elif operation == "lt":
                filters.append(column < v)
            elif operation == "gte":
                filters.append(column >= v)
            elif operation == "lte":
                filters.append(column <= v)
            else:
                raise ValueError(f"Unsupported operation '{operation}'")

        self.result_set = self.result_set.where(and_(*filters))
        return self

    def all(self):
        """
        Executes the query and returns the result as a GeoImageFrame.

        Returns:
            GeoImageFrame: The result of the query as a GeoImageFrame object.

        Raises:
            TypeError: If geometries are not of type Point.
        """
        with self.engine.connect() as conn:
            result = conn.execute(self.result_set)
            data = [row._asdict() for row in result.fetchall()]

        if not data:
            return GeoImageFrame([])  # Adjust according to your GeoImageFrame handling

        df_data = {col: [] for col in data[0].keys()}

        for d in data:
            for col, value in d.items():
                if isinstance(value, WKBElement):
                    try:
                        point_geom = loads(
                            bytes(value.data)
                        )  # convert WKBElement to Shapely geometry
                        if point_geom.geom_type != "Point":
                            raise TypeError("All geometries must be of type Point.")
                        df_data[col].append(point_geom)
                    except Exception as e:
                        print(f"Failed to process data {value.data}. Error: {e}")
                else:
                    df_data[col].append(value)

        return GeoImageFrame(df_data)

    def get_distinct_values(self, table_name, column_name):
        """
        Gets distinct values from a specific column of a table.

        Args:
            table_name (str): Name of the table to query.
            column_name (str): Name of the column to get distinct values from.

        Returns:
            list: A list of distinct values from the specified column.

        Raises:
            ValueError: If the specified column is not found in the table.
        """
        metadata = MetaData()
        metadata.reflect(bind=self.engine)

        if table_name not in metadata.tables:
            raise ValueError(f"Table '{table_name}' not found.")

        table = metadata.tables[table_name]

        if column_name not in table.columns:
            raise ValueError(
                f"Column '{column_name}' not found in table '{table_name}'"
            )

        column = table.columns[column_name]

        distinct_query = select(column).distinct()
        with self.engine.connect() as conn:
            result = conn.execute(distinct_query)

        distinct_values = [row[0] for row in result.fetchall()]
        return distinct_values

    def upsert_images(self, gif, table_name, conflict="update"):
        """
        Inserts or updates image data in the specified table.

        Args:
            gif (GeoImageFrame): The data frame containing image data.
            table_name (str): The name of the table to upsert into.
            conflict (str, optional): Conflict resolution strategy ("update" or "nothing"). Defaults to "update".

        Raises:
            ValueError: If an invalid conflict resolution type is provided.
        """
        data = gif.to_dict(orient="records")

        meta = MetaData()
        table = Table(table_name, meta, autoload_with=self.engine)

        with self.engine.begin() as conn:
            for record in data:
                record = self._convert_points_to_wkt(record)
                record = self._convert_dicts_to_json(record)
                insert_stmt = insert(table).values(**record)
                if conflict == "update":
                    updates = {
                        key: getattr(insert_stmt.excluded, key)
                        for key in record
                        if key != "image_url"
                    }
                    constraint_name = f"{table.name}_image_url_key"
                    on_conflict_stmt = insert_stmt.on_conflict_do_update(
                        constraint=constraint_name,
                        set_=updates
                    )
                elif conflict == "nothing":
                    on_conflict_stmt = insert_stmt.on_conflict_do_nothing()
                else:
                    raise ValueError(
                        "Invalid conflict resolution type. Choose 'update' or 'nothing'."
                    )

                conn.execute(on_conflict_stmt)

`init(database_url)` ¶

Initializes the ImageDB class with the given database URL.

Parameters:

Name	Type	Description	Default
`database_url`	`str`	The URL of the database to connect to.	required

Source code in landlensdb/handlers/db.py

def __init__(self, database_url):
    """
    Initializes the ImageDB class with the given database URL.

    Args:
        database_url (str): The URL of the database to connect to.
    """
    self.DATABASE_URL = database_url
    self.engine = create_engine(self.DATABASE_URL)
    self.result_set = None
    self.selected_table = None

`_convert_dicts_to_json(record)` `staticmethod` ¶

Converts dictionary values in a record to JSON strings.

Parameters:

Name	Type	Description	Default
`record`	`dict`	A dictionary where values may include other dictionaries.	required

Returns:

Name	Type	Description
`dict`		The modified record with dict values converted to JSON strings.

Source code in landlensdb/handlers/db.py

@staticmethod
def _convert_dicts_to_json(record):
    """
    Converts dictionary values in a record to JSON strings.

    Args:
        record (dict): A dictionary where values may include other dictionaries.

    Returns:
        dict: The modified record with dict values converted to JSON strings.
    """
    for key, value in record.items():
        if isinstance(value, dict):
            record[key] = json.dumps(value)
    return record

`_convert_points_to_wkt(record)` `staticmethod` ¶

Converts Point objects to WKT (Well-Known Text) format.

Parameters:

Name	Type	Description	Default
`record`	`dict`	A dictionary containing keys and values, where values can be Point objects.	required

Returns:

Name	Type	Description
`dict`		The record with Point objects converted to WKT strings.

Source code in landlensdb/handlers/db.py

@staticmethod
def _convert_points_to_wkt(record):
    """
    Converts Point objects to WKT (Well-Known Text) format.

    Args:
        record (dict): A dictionary containing keys and values, where values can be Point objects.

    Returns:
        dict: The record with Point objects converted to WKT strings.
    """
    for key, value in record.items():
        if isinstance(value, Point):
            record[key] = value.wkt
    return record

`all()` ¶

Executes the query and returns the result as a GeoImageFrame.

Returns:

Name	Type	Description
`GeoImageFrame`		The result of the query as a GeoImageFrame object.

Raises:

Type	Description
`TypeError`	If geometries are not of type Point.

Source code in landlensdb/handlers/db.py

def all(self):
    """
    Executes the query and returns the result as a GeoImageFrame.

    Returns:
        GeoImageFrame: The result of the query as a GeoImageFrame object.

    Raises:
        TypeError: If geometries are not of type Point.
    """
    with self.engine.connect() as conn:
        result = conn.execute(self.result_set)
        data = [row._asdict() for row in result.fetchall()]

    if not data:
        return GeoImageFrame([])  # Adjust according to your GeoImageFrame handling

    df_data = {col: [] for col in data[0].keys()}

    for d in data:
        for col, value in d.items():
            if isinstance(value, WKBElement):
                try:
                    point_geom = loads(
                        bytes(value.data)
                    )  # convert WKBElement to Shapely geometry
                    if point_geom.geom_type != "Point":
                        raise TypeError("All geometries must be of type Point.")
                    df_data[col].append(point_geom)
                except Exception as e:
                    print(f"Failed to process data {value.data}. Error: {e}")
            else:
                df_data[col].append(value)

    return GeoImageFrame(df_data)

`filter(**kwargs)` ¶

Applies filters to the selected table based on provided conditions.

Parameters:

Name	Type	Description	Default
`**kwargs`		Key-value pairs representing filters to apply.	`{}`

Returns:

Name	Type	Description
`ImageDB`		Returns self to enable method chaining.

Raises:

Type	Description
`ValueError`	If an unsupported operation or a nonexistent column is specified.

Source code in landlensdb/handlers/db.py

def filter(self, **kwargs):
    """
    Applies filters to the selected table based on provided conditions.

    Args:
        **kwargs: Key-value pairs representing filters to apply.

    Returns:
        ImageDB: Returns self to enable method chaining.

    Raises:
        ValueError: If an unsupported operation or a nonexistent column is specified.
    """
    filters = []

    for k, v in kwargs.items():
        if "__" in k:
            field_name, operation = k.split("__", 1)
        else:
            field_name = k
            operation = "eq"

        column = getattr(self.selected_table.columns, field_name, None)
        if column is None:
            raise ValueError(
                f"Column '{field_name}' not found in table '{self.selected_table.name}'"
            )

        if operation == "eq":
            filters.append(column == v)
        elif operation == "gt":
            filters.append(column > v)
        elif operation == "lt":
            filters.append(column < v)
        elif operation == "gte":
            filters.append(column >= v)
        elif operation == "lte":
            filters.append(column <= v)
        else:
            raise ValueError(f"Unsupported operation '{operation}'")

    self.result_set = self.result_set.where(and_(*filters))
    return self

`get_distinct_values(table_name, column_name)` ¶

Gets distinct values from a specific column of a table.

Parameters:

Name	Type	Description	Default
`table_name`	`str`	Name of the table to query.	required
`column_name`	`str`	Name of the column to get distinct values from.	required

Returns:

Name	Type	Description
`list`		A list of distinct values from the specified column.

Raises:

Type	Description
`ValueError`	If the specified column is not found in the table.

Source code in landlensdb/handlers/db.py

def get_distinct_values(self, table_name, column_name):
    """
    Gets distinct values from a specific column of a table.

    Args:
        table_name (str): Name of the table to query.
        column_name (str): Name of the column to get distinct values from.

    Returns:
        list: A list of distinct values from the specified column.

    Raises:
        ValueError: If the specified column is not found in the table.
    """
    metadata = MetaData()
    metadata.reflect(bind=self.engine)

    if table_name not in metadata.tables:
        raise ValueError(f"Table '{table_name}' not found.")

    table = metadata.tables[table_name]

    if column_name not in table.columns:
        raise ValueError(
            f"Column '{column_name}' not found in table '{table_name}'"
        )

    column = table.columns[column_name]

    distinct_query = select(column).distinct()
    with self.engine.connect() as conn:
        result = conn.execute(distinct_query)

    distinct_values = [row[0] for row in result.fetchall()]
    return distinct_values

`table(table_name)` ¶

Selects a table for performing queries on.

Parameters:

Name	Type	Description	Default
`table_name`	`str`	Name of the table to select.	required

Returns:

Name	Type	Description
`ImageDB`		Returns self to enable method chaining.

Source code in landlensdb/handlers/db.py

def table(self, table_name):
    """
    Selects a table for performing queries on.

    Args:
        table_name (str): Name of the table to select.

    Returns:
        ImageDB: Returns self to enable method chaining.
    """
    metadata = MetaData()
    self.selected_table = Table(table_name, metadata, autoload_with=self.engine)
    self.result_set = self.selected_table.select()
    return self

`upsert_images(gif, table_name, conflict='update')` ¶

Inserts or updates image data in the specified table.

Parameters:

Name	Type	Description	Default
`gif`	`GeoImageFrame`	The data frame containing image data.	required
`table_name`	`str`	The name of the table to upsert into.	required
`conflict`	`str`	Conflict resolution strategy ("update" or "nothing"). Defaults to "update".	`'update'`

Raises:

Type	Description
`ValueError`	If an invalid conflict resolution type is provided.

Source code in landlensdb/handlers/db.py

def upsert_images(self, gif, table_name, conflict="update"):
    """
    Inserts or updates image data in the specified table.

    Args:
        gif (GeoImageFrame): The data frame containing image data.
        table_name (str): The name of the table to upsert into.
        conflict (str, optional): Conflict resolution strategy ("update" or "nothing"). Defaults to "update".

    Raises:
        ValueError: If an invalid conflict resolution type is provided.
    """
    data = gif.to_dict(orient="records")

    meta = MetaData()
    table = Table(table_name, meta, autoload_with=self.engine)

    with self.engine.begin() as conn:
        for record in data:
            record = self._convert_points_to_wkt(record)
            record = self._convert_dicts_to_json(record)
            insert_stmt = insert(table).values(**record)
            if conflict == "update":
                updates = {
                    key: getattr(insert_stmt.excluded, key)
                    for key in record
                    if key != "image_url"
                }
                constraint_name = f"{table.name}_image_url_key"
                on_conflict_stmt = insert_stmt.on_conflict_do_update(
                    constraint=constraint_name,
                    set_=updates
                )
            elif conflict == "nothing":
                on_conflict_stmt = insert_stmt.on_conflict_do_nothing()
            else:
                raise ValueError(
                    "Invalid conflict resolution type. Choose 'update' or 'nothing'."
                )

            conn.execute(on_conflict_stmt)

Image Handler¶

`Local` ¶

A class to process EXIF data from images, mainly focusing on extracting geotagging information.

This class includes methods to extract various camera and image properties, such as focal length, camera type, coordinates, and other related data.

Source code in landlensdb/handlers/image.py

class Local:
    """
    A class to process EXIF data from images, mainly focusing on extracting geotagging information.

    This class includes methods to extract various camera and image properties, such as focal length,
    camera type, coordinates, and other related data.
    """

    @staticmethod
    def _get_camera_model(exif_data):
        """
        Extracts the camera model from the EXIF data.

        Args:
            exif_data (dict): The EXIF data.

        Returns:
            str: Camera model if available, otherwise None.
        """
        return exif_data.get("Model", "").strip()

    @staticmethod
    def _infer_camera_type(focal_length, camera_model=None):
        """
        Infers the camera type based on the focal length and camera model.

        Args:
            focal_length (float): The focal length of the camera.
            camera_model (str): The camera model.

        Returns:
            str: One of "fisheye", "perspective", or "360-degree".
        """
        if not focal_length and not camera_model:
            return np.nan

        known_360_cameras = KNOWN_CAMERAS.get("360 Models", [])

        if camera_model in known_360_cameras:
            return "360-degree"

        # Further classification based on focal length
        if focal_length < 1.5:
            return "fisheye"
        else:
            return "perspective"

    @staticmethod
    def get_exif_data(img):
        """
        Retrieves the EXIF data from an image.

        Args:
            img (PIL.Image.Image): The image to extract EXIF data from.

        Returns:
            dict: A dictionary containing the EXIF data.
        """
        exif_data = {}
        info = img._getexif()
        if info:
            for tag, value in info.items():
                tag_name = TAGS.get(tag, tag)
                if tag_name == "GPSInfo":
                    gps_info = {}
                    for t in value:
                        sub_tag_name = GPSTAGS.get(t, t)
                        gps_info[sub_tag_name] = value[t]
                    exif_data[tag_name] = gps_info
                else:
                    exif_data[tag_name] = value
        return exif_data

    @staticmethod
    def create_thumbnail(image_path, size=(256, 256)):
        """
        Creates a thumbnail for the given image while preserving aspect ratio.

        Args:
            image_path (str): Path to the original image
            size (tuple): Desired thumbnail size as (width, height). Default is (256, 256)

        Returns:
            str: Path to the created thumbnail

        Raises:
            FileNotFoundError: If the image file doesn't exist
            ValueError: If the image cannot be opened or processed
        """
        if not os.path.exists(image_path):
            raise FileNotFoundError(f"Image file not found: {image_path}")

        # Create thumbnails directory in the same directory as the original image
        original_dir = os.path.dirname(image_path)
        thumbnail_dir = os.path.join(original_dir, "thumbnails")
        os.makedirs(thumbnail_dir, exist_ok=True)

        # Generate thumbnail filename
        original_filename = os.path.basename(image_path)
        thumbnail_filename = f"thumb_{original_filename}"
        thumbnail_path = os.path.join(thumbnail_dir, thumbnail_filename)

        try:
            with Image.open(image_path) as img:
                # Convert to RGB if necessary
                if img.mode in ('RGBA', 'LA'):
                    img = img.convert('RGB')

                # Calculate new dimensions preserving aspect ratio
                img.thumbnail(size, Image.Resampling.LANCZOS)

                # Save thumbnail
                img.save(thumbnail_path, "JPEG", quality=85)
                return thumbnail_path

        except Exception as e:
            raise ValueError(f"Error creating thumbnail for {image_path}: {str(e)}")

    @staticmethod
    def _to_decimal(coord_tuple):
        """
        Converts coordinates from degrees, minutes, and seconds to decimal.

        Args:
            coord_tuple (tuple or str): The coordinate tuple to convert.

        Returns:
            float: Decimal representation of the coordinates.
        """
        if isinstance(coord_tuple, tuple) and len(coord_tuple) == 3:
            return (
                float(coord_tuple[0])
                + float(coord_tuple[1]) / 60
                + float(coord_tuple[2]) / 3600
            )
        elif isinstance(coord_tuple, str) and "/" in coord_tuple:
            num, denom = coord_tuple.split("/")
            if float(denom) != 0:
                return float(num) / float(denom)
            else:
                return None
        return coord_tuple

    @classmethod
    def _get_geotagging(cls, exif):
        """
        Extracts geotagging information from EXIF metadata.

        Args:
            exif (dict): The EXIF metadata.

        Returns:
            dict: A dictionary containing the geotagging information.

        Raises:
            ValueError: If no EXIF metadata found or no GPSInfo tag found.
        """
        if not exif:
            raise ValueError("No EXIF metadata found")

        idx = None
        for tag, label in TAGS.items():
            if label == "GPSInfo":
                idx = tag
                break

        if idx is None:
            raise ValueError("No GPSInfo tag found in TAGS.")

        gps_data = exif.get("GPSInfo", exif.get(idx, None))
        if not gps_data:
            raise ValueError("No EXIF geotagging found")

        geotagging = {}
        for key, val in GPSTAGS.items():
            data_value = gps_data.get(key) or gps_data.get(val)
            if data_value:
                geotagging[val] = data_value

        return geotagging

    @classmethod
    def _get_image_altitude(cls, geotags):
        """
        Retrieves the altitude information from geotags.

        Args:
            geotags (dict): The geotags information.

        Returns:
            float: Altitude information if available, otherwise None.
        """
        if "GPSAltitude" in geotags:
            return geotags["GPSAltitude"]
        return None

    @classmethod
    def _get_image_direction(cls, geotags):
        """
        Retrieves the image direction information from geotags.

        Args:
            geotags (dict): The geotags information.

        Returns:
            float: Image direction information if available, otherwise None.
        """
        if "GPSImgDirection" in geotags:
            return geotags["GPSImgDirection"]
        return None

    @classmethod
    def _get_coordinates(cls, geotags):
        """
        Retrieves the latitude and longitude coordinates from geotags.

        Args:
            geotags (dict): The geotags information.

        Returns:
            tuple: Latitude and longitude coordinates.

        Raises:
            ValueError: If the coordinates are invalid.
        """
        lat = cls._to_decimal(geotags["GPSLatitude"])
        lon = cls._to_decimal(geotags["GPSLongitude"])

        if geotags["GPSLatitudeRef"] == "S":
            lat = -lat

        if geotags["GPSLongitudeRef"] == "W":
            lon = -lon

        return lat, lon

    @staticmethod
    def _get_focal_length(exif_data):
        """
        Retrieves the focal length from the EXIF data.

        Args:
            exif_data (dict): The EXIF data.

        Returns:
            float: Focal length if available, otherwise None.
        """
        focal_length = exif_data.get("FocalLength", None)

        if focal_length is None:
            return None

        if isinstance(focal_length, numbers.Number):
            return float(focal_length)

        elif (
            isinstance(focal_length, tuple)
            and len(focal_length) == 2
            and focal_length[1] != 0
        ):
            return float(focal_length[0]) / focal_length[1]

        elif (
            hasattr(focal_length, "num")
            and hasattr(focal_length, "den")
            and focal_length.den != 0
        ):
            return float(focal_length.num) / focal_length.den

        else:
            return None

    @classmethod
    def load_images(cls, directory, additional_columns=None, create_thumbnails=True, thumbnail_size=(256, 256)):
        """
        Loads images from a given directory, extracts relevant information, and returns it in a GeoImageFrame.

        Args:
            directory (str): Path to the directory containing images.
            additional_columns (list, optional): List of additional column names or tuples containing column name and EXIF tag.
            create_thumbnails (bool): Whether to create thumbnails for the images. Defaults to True.
            thumbnail_size (tuple): Size for generated thumbnails as (width, height). Defaults to (256, 256).

        Returns:
            GeoImageFrame: Frame containing the data extracted from the images.

        Raises:
            ValueError: If no valid images are found in the directory.

        Examples:
            >>> directory = "/path/to/images"
            >>> image_data = Local.load_images(directory, create_thumbnails=True)
        """
        tf = TimezoneFinder()
        data = []
        valid_image_count = 0
        for root, dirs, files in os.walk(directory):
            # Skip thumbnails directory
            if "thumbnails" in dirs:
                dirs.remove("thumbnails")
            for file in files:
                if file.lower().endswith((".png", ".jpg", ".jpeg")):
                    valid_image_count += 1
                    filepath = os.path.join(root, file)
                    img = Image.open(filepath)
                    exif_data = cls.get_exif_data(img)
                    try:
                        geotags = cls._get_geotagging(exif_data)
                        lat, lon = cls._get_coordinates(geotags)
                        if lat is None or lon is None:
                            raise ValueError(
                                f"Invalid coordinates for {filepath}: Latitude: {lat}, Longitude: {lon}"
                            )
                        geometry = Point(lon, lat)
                    except Exception as e:
                        warnings.warn(
                            f"Error extracting geotags for {filepath}: {str(e)}. Skipped."
                        )
                        continue
                    focal_length = cls._get_focal_length(exif_data)
                    camera_model = cls._get_camera_model(exif_data)
                    camera_type = cls._infer_camera_type(focal_length, camera_model)

                    k1 = None
                    k2 = None
                    if None in [focal_length, k1, k2]:
                        camera_parameters = np.nan
                    else:
                        camera_parameters = ",".join(
                            [str(focal_length), str(k1), str(k2)]
                        )

                    captured_at_str = exif_data.get("DateTime", None)
                    if captured_at_str and geometry:
                        captured_at_naive = datetime.strptime(
                            captured_at_str, "%Y:%m:%d %H:%M:%S"
                        )
                        tz_name = tf.timezone_at(lat=lat, lng=lon)
                        if tz_name:
                            local_tz = pytz.timezone(tz_name)
                            captured_at = local_tz.localize(
                                captured_at_naive
                            ).isoformat()
                        else:
                            captured_at = captured_at_naive.isoformat()
                    else:
                        captured_at = None

                    altitude = np.float32(cls._get_image_altitude(geotags))
                    compass_angle = np.float32(cls._get_image_direction(geotags))
                    exif_orientation = np.float32(exif_data.get("Orientation", None))

                    # Generate thumbnail if requested
                    thumb_url = None
                    if create_thumbnails:
                        try:
                            # Check if thumbnail already exists
                            thumbnail_dir = os.path.join(os.path.dirname(filepath), "thumbnails")
                            thumb_filename = f"thumb_{os.path.basename(filepath)}"
                            thumb_path = os.path.join(thumbnail_dir, thumb_filename)

                            if os.path.exists(thumb_path):
                                thumb_url = thumb_path
                            else:
                                thumb_url = cls.create_thumbnail(filepath, size=thumbnail_size)
                        except Exception as e:
                            warnings.warn(f"Error creating thumbnail for {filepath}: {str(e)}")

                    image_data = {
                        "name": filepath.split("/")[-1],
                        "altitude": altitude,
                        "camera_type": camera_type,
                        "camera_parameters": camera_parameters,
                        "captured_at": captured_at,
                        "compass_angle": compass_angle,
                        "exif_orientation": exif_orientation,
                        "image_url": filepath,
                        "thumb_url": thumb_url,
                        "geometry": geometry,
                    }

                    for column_info in additional_columns or []:
                        if isinstance(column_info, str):
                            image_data[column_info] = np.nan
                        elif isinstance(column_info, tuple):
                            col_name, exif_tag = column_info
                            image_data[col_name] = exif_data.get(exif_tag, np.nan)

                    data.append(image_data)

        if valid_image_count == 0:
            raise ValueError("The directory does not contain any valid images")

        gif = GeoImageFrame(data, geometry="geometry")
        gif.set_crs(epsg=4326, inplace=True)
        return gif

`_get_camera_model(exif_data)` `staticmethod` ¶

Extracts the camera model from the EXIF data.

Parameters:

Name	Type	Description	Default
`exif_data`	`dict`	The EXIF data.	required

Returns:

Name	Type	Description
`str`		Camera model if available, otherwise None.

Source code in landlensdb/handlers/image.py

@staticmethod
def _get_camera_model(exif_data):
    """
    Extracts the camera model from the EXIF data.

    Args:
        exif_data (dict): The EXIF data.

    Returns:
        str: Camera model if available, otherwise None.
    """
    return exif_data.get("Model", "").strip()

`_get_coordinates(geotags)` `classmethod` ¶

Retrieves the latitude and longitude coordinates from geotags.

Parameters:

Name	Type	Description	Default
`geotags`	`dict`	The geotags information.	required

Returns:

Name	Type	Description
`tuple`		Latitude and longitude coordinates.

Raises:

Type	Description
`ValueError`	If the coordinates are invalid.

Source code in landlensdb/handlers/image.py

@classmethod
def _get_coordinates(cls, geotags):
    """
    Retrieves the latitude and longitude coordinates from geotags.

    Args:
        geotags (dict): The geotags information.

    Returns:
        tuple: Latitude and longitude coordinates.

    Raises:
        ValueError: If the coordinates are invalid.
    """
    lat = cls._to_decimal(geotags["GPSLatitude"])
    lon = cls._to_decimal(geotags["GPSLongitude"])

    if geotags["GPSLatitudeRef"] == "S":
        lat = -lat

    if geotags["GPSLongitudeRef"] == "W":
        lon = -lon

    return lat, lon

`_get_focal_length(exif_data)` `staticmethod` ¶

Retrieves the focal length from the EXIF data.

Parameters:

Name	Type	Description	Default
`exif_data`	`dict`	The EXIF data.	required

Returns:

Name	Type	Description
`float`		Focal length if available, otherwise None.

Source code in landlensdb/handlers/image.py

@staticmethod
def _get_focal_length(exif_data):
    """
    Retrieves the focal length from the EXIF data.

    Args:
        exif_data (dict): The EXIF data.

    Returns:
        float: Focal length if available, otherwise None.
    """
    focal_length = exif_data.get("FocalLength", None)

    if focal_length is None:
        return None

    if isinstance(focal_length, numbers.Number):
        return float(focal_length)

    elif (
        isinstance(focal_length, tuple)
        and len(focal_length) == 2
        and focal_length[1] != 0
    ):
        return float(focal_length[0]) / focal_length[1]

    elif (
        hasattr(focal_length, "num")
        and hasattr(focal_length, "den")
        and focal_length.den != 0
    ):
        return float(focal_length.num) / focal_length.den

    else:
        return None

`_get_geotagging(exif)` `classmethod` ¶

Extracts geotagging information from EXIF metadata.

Parameters:

Name	Type	Description	Default
`exif`	`dict`	The EXIF metadata.	required

Returns:

Name	Type	Description
`dict`		A dictionary containing the geotagging information.

Raises:

Type	Description
`ValueError`	If no EXIF metadata found or no GPSInfo tag found.

Source code in landlensdb/handlers/image.py

@classmethod
def _get_geotagging(cls, exif):
    """
    Extracts geotagging information from EXIF metadata.

    Args:
        exif (dict): The EXIF metadata.

    Returns:
        dict: A dictionary containing the geotagging information.

    Raises:
        ValueError: If no EXIF metadata found or no GPSInfo tag found.
    """
    if not exif:
        raise ValueError("No EXIF metadata found")

    idx = None
    for tag, label in TAGS.items():
        if label == "GPSInfo":
            idx = tag
            break

    if idx is None:
        raise ValueError("No GPSInfo tag found in TAGS.")

    gps_data = exif.get("GPSInfo", exif.get(idx, None))
    if not gps_data:
        raise ValueError("No EXIF geotagging found")

    geotagging = {}
    for key, val in GPSTAGS.items():
        data_value = gps_data.get(key) or gps_data.get(val)
        if data_value:
            geotagging[val] = data_value

    return geotagging

`_get_image_altitude(geotags)` `classmethod` ¶

Retrieves the altitude information from geotags.

Parameters:

Name	Type	Description	Default
`geotags`	`dict`	The geotags information.	required

Returns:

Name	Type	Description
`float`		Altitude information if available, otherwise None.

Source code in landlensdb/handlers/image.py

@classmethod
def _get_image_altitude(cls, geotags):
    """
    Retrieves the altitude information from geotags.

    Args:
        geotags (dict): The geotags information.

    Returns:
        float: Altitude information if available, otherwise None.
    """
    if "GPSAltitude" in geotags:
        return geotags["GPSAltitude"]
    return None

`_get_image_direction(geotags)` `classmethod` ¶

Retrieves the image direction information from geotags.

Parameters:

Name	Type	Description	Default
`geotags`	`dict`	The geotags information.	required

Returns:

Name	Type	Description
`float`		Image direction information if available, otherwise None.

Source code in landlensdb/handlers/image.py

@classmethod
def _get_image_direction(cls, geotags):
    """
    Retrieves the image direction information from geotags.

    Args:
        geotags (dict): The geotags information.

    Returns:
        float: Image direction information if available, otherwise None.
    """
    if "GPSImgDirection" in geotags:
        return geotags["GPSImgDirection"]
    return None

`_infer_camera_type(focal_length, camera_model=None)` `staticmethod` ¶

Infers the camera type based on the focal length and camera model.

Parameters:

Name	Type	Description	Default
`focal_length`	`float`	The focal length of the camera.	required
`camera_model`	`str`	The camera model.	`None`

Returns:

Name	Type	Description
`str`		One of "fisheye", "perspective", or "360-degree".

Source code in landlensdb/handlers/image.py

@staticmethod
def _infer_camera_type(focal_length, camera_model=None):
    """
    Infers the camera type based on the focal length and camera model.

    Args:
        focal_length (float): The focal length of the camera.
        camera_model (str): The camera model.

    Returns:
        str: One of "fisheye", "perspective", or "360-degree".
    """
    if not focal_length and not camera_model:
        return np.nan

    known_360_cameras = KNOWN_CAMERAS.get("360 Models", [])

    if camera_model in known_360_cameras:
        return "360-degree"

    # Further classification based on focal length
    if focal_length < 1.5:
        return "fisheye"
    else:
        return "perspective"

`_to_decimal(coord_tuple)` `staticmethod` ¶

Converts coordinates from degrees, minutes, and seconds to decimal.

Parameters:

Name	Type	Description	Default
`coord_tuple`	`tuple or str`	The coordinate tuple to convert.	required

Returns:

Name	Type	Description
`float`		Decimal representation of the coordinates.

Source code in landlensdb/handlers/image.py

@staticmethod
def _to_decimal(coord_tuple):
    """
    Converts coordinates from degrees, minutes, and seconds to decimal.

    Args:
        coord_tuple (tuple or str): The coordinate tuple to convert.

    Returns:
        float: Decimal representation of the coordinates.
    """
    if isinstance(coord_tuple, tuple) and len(coord_tuple) == 3:
        return (
            float(coord_tuple[0])
            + float(coord_tuple[1]) / 60
            + float(coord_tuple[2]) / 3600
        )
    elif isinstance(coord_tuple, str) and "/" in coord_tuple:
        num, denom = coord_tuple.split("/")
        if float(denom) != 0:
            return float(num) / float(denom)
        else:
            return None
    return coord_tuple

`create_thumbnail(image_path, size=(256, 256))` `staticmethod` ¶

Creates a thumbnail for the given image while preserving aspect ratio.

Parameters:

Name	Type	Description	Default
`image_path`	`str`	Path to the original image	required
`size`	`tuple`	Desired thumbnail size as (width, height). Default is (256, 256)	`(256, 256)`

Returns:

Name	Type	Description
`str`		Path to the created thumbnail

Raises:

Type	Description
`FileNotFoundError`	If the image file doesn't exist
`ValueError`	If the image cannot be opened or processed

Source code in landlensdb/handlers/image.py

@staticmethod
def create_thumbnail(image_path, size=(256, 256)):
    """
    Creates a thumbnail for the given image while preserving aspect ratio.

    Args:
        image_path (str): Path to the original image
        size (tuple): Desired thumbnail size as (width, height). Default is (256, 256)

    Returns:
        str: Path to the created thumbnail

    Raises:
        FileNotFoundError: If the image file doesn't exist
        ValueError: If the image cannot be opened or processed
    """
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image file not found: {image_path}")

    # Create thumbnails directory in the same directory as the original image
    original_dir = os.path.dirname(image_path)
    thumbnail_dir = os.path.join(original_dir, "thumbnails")
    os.makedirs(thumbnail_dir, exist_ok=True)

    # Generate thumbnail filename
    original_filename = os.path.basename(image_path)
    thumbnail_filename = f"thumb_{original_filename}"
    thumbnail_path = os.path.join(thumbnail_dir, thumbnail_filename)

    try:
        with Image.open(image_path) as img:
            # Convert to RGB if necessary
            if img.mode in ('RGBA', 'LA'):
                img = img.convert('RGB')

            # Calculate new dimensions preserving aspect ratio
            img.thumbnail(size, Image.Resampling.LANCZOS)

            # Save thumbnail
            img.save(thumbnail_path, "JPEG", quality=85)
            return thumbnail_path

    except Exception as e:
        raise ValueError(f"Error creating thumbnail for {image_path}: {str(e)}")

`get_exif_data(img)` `staticmethod` ¶

Retrieves the EXIF data from an image.

Parameters:

Name	Type	Description	Default
`img`	`Image`	The image to extract EXIF data from.	required

Returns:

Name	Type	Description
`dict`		A dictionary containing the EXIF data.

Source code in landlensdb/handlers/image.py

@staticmethod
def get_exif_data(img):
    """
    Retrieves the EXIF data from an image.

    Args:
        img (PIL.Image.Image): The image to extract EXIF data from.

    Returns:
        dict: A dictionary containing the EXIF data.
    """
    exif_data = {}
    info = img._getexif()
    if info:
        for tag, value in info.items():
            tag_name = TAGS.get(tag, tag)
            if tag_name == "GPSInfo":
                gps_info = {}
                for t in value:
                    sub_tag_name = GPSTAGS.get(t, t)
                    gps_info[sub_tag_name] = value[t]
                exif_data[tag_name] = gps_info
            else:
                exif_data[tag_name] = value
    return exif_data

`load_images(directory, additional_columns=None, create_thumbnails=True, thumbnail_size=(256, 256))` `classmethod` ¶

Loads images from a given directory, extracts relevant information, and returns it in a GeoImageFrame.

Parameters:

Name	Type	Description	Default
`directory`	`str`	Path to the directory containing images.	required
`additional_columns`	`list`	List of additional column names or tuples containing column name and EXIF tag.	`None`
`create_thumbnails`	`bool`	Whether to create thumbnails for the images. Defaults to True.	`True`
`thumbnail_size`	`tuple`	Size for generated thumbnails as (width, height). Defaults to (256, 256).	`(256, 256)`

Returns:

Name	Type	Description
`GeoImageFrame`		Frame containing the data extracted from the images.

Raises:

Type	Description
`ValueError`	If no valid images are found in the directory.

Examples:

>>> directory = "/path/to/images"
>>> image_data = Local.load_images(directory, create_thumbnails=True)

Source code in landlensdb/handlers/image.py

@classmethod
def load_images(cls, directory, additional_columns=None, create_thumbnails=True, thumbnail_size=(256, 256)):
    """
    Loads images from a given directory, extracts relevant information, and returns it in a GeoImageFrame.

    Args:
        directory (str): Path to the directory containing images.
        additional_columns (list, optional): List of additional column names or tuples containing column name and EXIF tag.
        create_thumbnails (bool): Whether to create thumbnails for the images. Defaults to True.
        thumbnail_size (tuple): Size for generated thumbnails as (width, height). Defaults to (256, 256).

    Returns:
        GeoImageFrame: Frame containing the data extracted from the images.

    Raises:
        ValueError: If no valid images are found in the directory.

    Examples:
        >>> directory = "/path/to/images"
        >>> image_data = Local.load_images(directory, create_thumbnails=True)
    """
    tf = TimezoneFinder()
    data = []
    valid_image_count = 0
    for root, dirs, files in os.walk(directory):
        # Skip thumbnails directory
        if "thumbnails" in dirs:
            dirs.remove("thumbnails")
        for file in files:
            if file.lower().endswith((".png", ".jpg", ".jpeg")):
                valid_image_count += 1
                filepath = os.path.join(root, file)
                img = Image.open(filepath)
                exif_data = cls.get_exif_data(img)
                try:
                    geotags = cls._get_geotagging(exif_data)
                    lat, lon = cls._get_coordinates(geotags)
                    if lat is None or lon is None:
                        raise ValueError(
                            f"Invalid coordinates for {filepath}: Latitude: {lat}, Longitude: {lon}"
                        )
                    geometry = Point(lon, lat)
                except Exception as e:
                    warnings.warn(
                        f"Error extracting geotags for {filepath}: {str(e)}. Skipped."
                    )
                    continue
                focal_length = cls._get_focal_length(exif_data)
                camera_model = cls._get_camera_model(exif_data)
                camera_type = cls._infer_camera_type(focal_length, camera_model)

                k1 = None
                k2 = None
                if None in [focal_length, k1, k2]:
                    camera_parameters = np.nan
                else:
                    camera_parameters = ",".join(
                        [str(focal_length), str(k1), str(k2)]
                    )

                captured_at_str = exif_data.get("DateTime", None)
                if captured_at_str and geometry:
                    captured_at_naive = datetime.strptime(
                        captured_at_str, "%Y:%m:%d %H:%M:%S"
                    )
                    tz_name = tf.timezone_at(lat=lat, lng=lon)
                    if tz_name:
                        local_tz = pytz.timezone(tz_name)
                        captured_at = local_tz.localize(
                            captured_at_naive
                        ).isoformat()
                    else:
                        captured_at = captured_at_naive.isoformat()
                else:
                    captured_at = None

                altitude = np.float32(cls._get_image_altitude(geotags))
                compass_angle = np.float32(cls._get_image_direction(geotags))
                exif_orientation = np.float32(exif_data.get("Orientation", None))

                # Generate thumbnail if requested
                thumb_url = None
                if create_thumbnails:
                    try:
                        # Check if thumbnail already exists
                        thumbnail_dir = os.path.join(os.path.dirname(filepath), "thumbnails")
                        thumb_filename = f"thumb_{os.path.basename(filepath)}"
                        thumb_path = os.path.join(thumbnail_dir, thumb_filename)

                        if os.path.exists(thumb_path):
                            thumb_url = thumb_path
                        else:
                            thumb_url = cls.create_thumbnail(filepath, size=thumbnail_size)
                    except Exception as e:
                        warnings.warn(f"Error creating thumbnail for {filepath}: {str(e)}")

                image_data = {
                    "name": filepath.split("/")[-1],
                    "altitude": altitude,
                    "camera_type": camera_type,
                    "camera_parameters": camera_parameters,
                    "captured_at": captured_at,
                    "compass_angle": compass_angle,
                    "exif_orientation": exif_orientation,
                    "image_url": filepath,
                    "thumb_url": thumb_url,
                    "geometry": geometry,
                }

                for column_info in additional_columns or []:
                    if isinstance(column_info, str):
                        image_data[column_info] = np.nan
                    elif isinstance(column_info, tuple):
                        col_name, exif_tag = column_info
                        image_data[col_name] = exif_data.get(exif_tag, np.nan)

                data.append(image_data)

    if valid_image_count == 0:
        raise ValueError("The directory does not contain any valid images")

    gif = GeoImageFrame(data, geometry="geometry")
    gif.set_crs(epsg=4326, inplace=True)
    return gif

Handlers API¶

Cloud Handler¶

Mapillary ¶

__init__(mapillary_token) ¶

_bbox_to_tile_coords(bbox, zoom) ¶

_extract_image_ids_from_features(features) ¶

_fetch_coverage_tile(zoom, x, y) ¶

_fetch_image_metadata(image_ids, fields, max_workers=10) ¶

_get_timestamp(date_string, end_of_day=False) staticmethod ¶

_json_to_gdf(json_data) ¶

_process_timestamp(epoch_time_ms, lat, lng) ¶

_recursive_fetch(bbox, fields, start_timestamp=None, end_timestamp=None, current_depth=0, max_recursion_depth=None) ¶

_split_bbox(inner_bbox) staticmethod ¶

_tile_to_bbox(tile, zoom_level) ¶

_validate_fields(fields) ¶

fetch_by_id(image_id, fields=None) ¶

fetch_by_sequence(sequence_ids, fields=None) ¶

fetch_within_bbox(initial_bbox, start_date=None, end_date=None, fields=None, max_recursion_depth=25, use_coverage_tiles=True, max_images=5000, max_workers=10) ¶

DB Handler¶

Postgres ¶

__init__(database_url) ¶

_convert_dicts_to_json(record) staticmethod ¶

_convert_points_to_wkt(record) staticmethod ¶

all() ¶

filter(**kwargs) ¶

get_distinct_values(table_name, column_name) ¶

table(table_name) ¶

upsert_images(gif, table_name, conflict='update') ¶

Image Handler¶

Local ¶

_get_camera_model(exif_data) staticmethod ¶

_get_coordinates(geotags) classmethod ¶

_get_focal_length(exif_data) staticmethod ¶

_get_geotagging(exif) classmethod ¶

_get_image_altitude(geotags) classmethod ¶

_get_image_direction(geotags) classmethod ¶

_infer_camera_type(focal_length, camera_model=None) staticmethod ¶

_to_decimal(coord_tuple) staticmethod ¶

create_thumbnail(image_path, size=(256, 256)) staticmethod ¶

get_exif_data(img) staticmethod ¶

load_images(directory, additional_columns=None, create_thumbnails=True, thumbnail_size=(256, 256)) classmethod ¶

`Mapillary` ¶

`init(mapillary_token)` ¶

`_bbox_to_tile_coords(bbox, zoom)` ¶

`_extract_image_ids_from_features(features)` ¶

`_fetch_coverage_tile(zoom, x, y)` ¶

`_fetch_image_metadata(image_ids, fields, max_workers=10)` ¶

`_get_timestamp(date_string, end_of_day=False)` `staticmethod` ¶

`_json_to_gdf(json_data)` ¶

`_process_timestamp(epoch_time_ms, lat, lng)` ¶

`_recursive_fetch(bbox, fields, start_timestamp=None, end_timestamp=None, current_depth=0, max_recursion_depth=None)` ¶

`_split_bbox(inner_bbox)` `staticmethod` ¶

`_tile_to_bbox(tile, zoom_level)` ¶

`_validate_fields(fields)` ¶

`fetch_by_id(image_id, fields=None)` ¶

`fetch_by_sequence(sequence_ids, fields=None)` ¶

`fetch_within_bbox(initial_bbox, start_date=None, end_date=None, fields=None, max_recursion_depth=25, use_coverage_tiles=True, max_images=5000, max_workers=10)` ¶

`Postgres` ¶

`init(database_url)` ¶

`_convert_dicts_to_json(record)` `staticmethod` ¶

`_convert_points_to_wkt(record)` `staticmethod` ¶

`all()` ¶

`filter(**kwargs)` ¶

`get_distinct_values(table_name, column_name)` ¶

`table(table_name)` ¶

`upsert_images(gif, table_name, conflict='update')` ¶

`Local` ¶

`_get_camera_model(exif_data)` `staticmethod` ¶

`_get_coordinates(geotags)` `classmethod` ¶

`_get_focal_length(exif_data)` `staticmethod` ¶

`_get_geotagging(exif)` `classmethod` ¶

`_get_image_altitude(geotags)` `classmethod` ¶

`_get_image_direction(geotags)` `classmethod` ¶

`_infer_camera_type(focal_length, camera_model=None)` `staticmethod` ¶

`_to_decimal(coord_tuple)` `staticmethod` ¶

`create_thumbnail(image_path, size=(256, 256))` `staticmethod` ¶

`get_exif_data(img)` `staticmethod` ¶

`load_images(directory, additional_columns=None, create_thumbnails=True, thumbnail_size=(256, 256))` `classmethod` ¶