Skip to content

Handlers API

Cloud Handler

Mapillary

Class to interact with Mapillary's API to fetch image data.

Parameters:

Name Type Description Default
mapillary_token str

The authentication token for Mapillary.

required

Examples:

1
2
>>> mapillary = Mapillary("YOUR_TOKEN_HERE")
>>> images = mapillary.fetch_within_bbox([12.34, 56.78, 90.12, 34.56])
Source code in landlensdb/handlers/cloud.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
class Mapillary:
    """
    Class to interact with Mapillary's API to fetch image data.

    Args:
        mapillary_token (str): The authentication token for Mapillary.

    Examples:
        >>> mapillary = Mapillary("YOUR_TOKEN_HERE")
        >>> images = mapillary.fetch_within_bbox([12.34, 56.78, 90.12, 34.56])
    """

    BASE_URL = "https://graph.mapillary.com"
    TILES_URL = "https://tiles.mapillary.com"
    REQUIRED_FIELDS = ["id", "geometry"]
    FIELDS_LIST = [
        "id",
        "altitude",
        "atomic_scale",
        "camera_parameters",
        "camera_type",
        "captured_at",
        "compass_angle",
        "computed_altitude",
        "computed_compass_angle",
        "computed_geometry",
        "computed_rotation",
        "exif_orientation",
        "geometry",
        "height",
        "thumb_1024_url",
        "merge_cc",
        "mesh",
        "sequence",
        "sfm_cluster",
        "width",
        "detections",
        "quality_score"  # Added quality score field
    ]

    QUALITY_INDICATORS = [
        "quality_score",  # Primary quality indicator
        "computed_compass_angle",  # Secondary indicator
        "atomic_scale"  # Tertiary indicator
    ]
    IMAGE_URL_KEYS = [
        "thumb_256_url",
        "thumb_1024_url",
        "thumb_2048_url",
        "thumb_original_url",
    ]
    LIMIT = 2000
    TF = TimezoneFinder()
    ZOOM_LEVEL = 14  # Default zoom level for coverage tiles

    def __init__(self, mapillary_token):
        """
        Initialize a Mapillary object.

        Args:
            mapillary_token (str): The authentication token for Mapillary.
        """
        self.TOKEN = mapillary_token

    def _validate_fields(self, fields):
        """
        Validates the fields for fetching data.

        Args:
            fields (list): The fields to be validated.

        Raises:
            ValueError: If the required fields are missing.
        """
        if (
            "id" not in fields
            or "geometry" not in fields
            or not any(image_field in fields for image_field in self.IMAGE_URL_KEYS)
        ):
            raise ValueError(
                "Fields must contain 'id', 'geometry', and one of "
                + str(self.IMAGE_URL_KEYS)
            )

    @staticmethod
    def _split_bbox(inner_bbox):
        """
        Splits a bounding box into four quarters.

        Args:
            inner_bbox (list): A list representing the bounding box to split.

        Returns:
            list: A list of four bounding boxes, each representing a quarter.
        """
        x1, y1, x2, y2 = inner_bbox[:]
        xm = (x2 - x1) / 2
        ym = (y2 - y1) / 2

        q1 = [x1, y1, x1 + xm, y1 + ym]
        q2 = [x1 + xm, y1, x2, y1 + ym]
        q3 = [x1, y1 + ym, x1 + xm, y2]
        q4 = [x1 + xm, y1 + ym, x2, y2]

        return [q1, q2, q3, q4]

    def _json_to_gdf(self, json_data):
        """
        Converts JSON data from Mapillary to a GeoDataFrame.

        Args:
            json_data (list): A list of JSON data from Mapillary.

        Returns:
            GeoDataFrame: A GeoDataFrame containing the image data.
        """
        # Early return if no data
        if not json_data:
            return GeoDataFrame(geometry=[])

        for img in json_data:
            # Basic field conversions
            coords = img.get("geometry", {}).get("coordinates", [None, None])
            img["geometry"] = Point(coords)
            img["mly_id"] = img.pop("id")
            img["name"] = f"mly|{img['mly_id']}"

            # Handle computed geometry
            if "computed_geometry" in img:
                coords = img.get("computed_geometry", {}).get(
                    "coordinates", [None, None]
                )
                img["computed_geometry"] = Point(coords)

            # Process timestamp with timezone
            if "captured_at" in img:
                lat = img["geometry"].y
                lng = img["geometry"].x
                img["captured_at"] = self._process_timestamp(
                    img.get("captured_at"), lat, lng
                )

            # Set image URL from available options
            image_url_found = False
            for key in self.IMAGE_URL_KEYS:
                if key in img:
                    img["image_url"] = str(img.pop(key))  # Explicitly convert to string
                    image_url_found = True
                    break

            # If no image URL was found, set a placeholder URL
            # Instead of using a direct Mapillary API URL that might fail,
            # we'll use a placeholder that indicates the image URL is missing
            if not image_url_found:
                img["image_url"] = f"placeholder://mapillary/{img['mly_id']}"

            # Convert list parameters to strings
            for key in ["camera_parameters", "computed_rotation"]:
                if key in img and isinstance(img[key], list):
                    img[key] = ",".join(map(str, img[key]))

            # Calculate quality score if not present
            if "quality_score" not in img:
                quality_score = 0.0
                if "computed_compass_angle" in img:
                    quality_score += 0.5  # Good compass data
                if "atomic_scale" in img:
                    quality_score += 0.3  # Good scale data
                if img.get("camera_type"):
                    quality_score += 0.2  # Camera type available
                img["quality_score"] = quality_score

        # Create GeoDataFrame
        gdf = GeoDataFrame(json_data, crs="EPSG:4326")
        gdf.set_geometry("geometry", inplace=True)

        # Sort by quality indicators and drop duplicates by sequence
        if "sequence" in gdf.columns:
            sort_columns = [col for col in self.QUALITY_INDICATORS if col in gdf.columns]
            if sort_columns:
                gdf = gdf.sort_values(sort_columns, ascending=False)
                gdf = gdf.drop_duplicates(subset=['sequence'], keep='first')

        # Ensure image_url is a string type
        if "image_url" in gdf.columns:
            gdf["image_url"] = gdf["image_url"].astype(str)

        return gdf

    def _bbox_to_tile_coords(self, bbox, zoom):
        """
        Convert a bounding box to tile coordinates at a given zoom level.

        Args:
            bbox (list): [west, south, east, north] coordinates
            zoom (int): Zoom level

        Returns:
            tuple: (min_x, min_y, max_x, max_y) tile coordinates
        """
        def lat_to_tile_y(lat_deg, zoom):
            lat_rad = math.radians(lat_deg)
            n = 2.0 ** zoom
            return int((1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * n)

        def lon_to_tile_x(lon_deg, zoom):
            n = 2.0 ** zoom
            return int((lon_deg + 180.0) / 360.0 * n)

        west, south, east, north = bbox
        min_x = lon_to_tile_x(west, zoom)
        max_x = lon_to_tile_x(east, zoom)
        min_y = lat_to_tile_y(north, zoom)  # Note: y coordinates are inverted
        max_y = lat_to_tile_y(south, zoom)

        return min_x, min_y, max_x, max_y

    def _tile_to_bbox(self, tile, zoom_level):
        """
        Converts tile coordinates to a bounding box.

        Args:
            tile (dict): Tile coordinates (x, y).
            zoom_level (int): The zoom level of the tile.

        Returns:
            list: Bounding box coordinates [west, south, east, north].
        """
        x, y = tile['x'], tile['y']
        n = 2.0 ** zoom_level
        west = x / n * 360.0 - 180.0
        east = (x + 1) / n * 360.0 - 180.0

        def inv_lat(y_tile):
            return math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * y_tile / n))))

        north = inv_lat(y)
        south = inv_lat(y + 1)

        return [west, south, east, north]

    def _fetch_coverage_tile(self, zoom, x, y):
        """
        Fetches a single coverage tile.

        Args:
            zoom (int): Zoom level
            x (int): Tile X coordinate
            y (int): Tile Y coordinate

        Returns:
            list: Image features from the tile
        """
        url = (
            f"{self.TILES_URL}/maps/vtp/mly1_public/2"
            f"/{zoom}/{x}/{y}"
            f"?access_token={self.TOKEN}"
        )

        try:
            response = requests.get(url)
            if response.status_code == 200:
                # Vector tiles are binary, not JSON
                if 'application/x-protobuf' in response.headers.get('content-type', ''):
                    try:
                        # Decode the vector tile
                        tile_data = mapbox_vector_tile.decode(response.content)

                        # Check for image layer at zoom level 14
                        if 'image' in tile_data and zoom == 14:
                            return tile_data['image']['features']

                        # Check for sequence layer at zoom levels 6-14
                        elif 'sequence' in tile_data and 6 <= zoom <= 14:
                            return tile_data['sequence']['features']

                        # Check for overview layer at zoom levels 0-5
                        elif 'overview' in tile_data and 0 <= zoom <= 5:
                            return tile_data['overview']['features']

                        else:
                            warnings.warn(f"No usable layers found in tile {x},{y}")
                            return []

                    except Exception as e:
                        warnings.warn(f"Error decoding vector tile {x},{y}: {str(e)}")
                        return []
                else:
                    warnings.warn(f"Unexpected content type for tile {x},{y}")
                    return []
            else:
                warnings.warn(f"Error fetching tile {x},{y}: {response.status_code}")
                return []
        except Exception as e:
            warnings.warn(f"Exception fetching tile {x},{y}: {str(e)}")
            return []

    def _extract_image_ids_from_features(self, features):
        """
        Extracts image IDs from tile features.

        Args:
            features (list): List of features from a vector tile

        Returns:
            list: List of image IDs
        """
        image_ids = []

        for feature in features:
            if 'id' in feature.get('properties', {}):
                image_ids.append(str(feature['properties']['id']))
            elif 'image_id' in feature.get('properties', {}):
                image_ids.append(str(feature['properties']['image_id']))

        return image_ids

    def _fetch_image_metadata(self, image_ids, fields, max_workers=10):
        """
        Fetches metadata for multiple images using multi-threading.

        Args:
            image_ids (list): List of image IDs
            fields (list): Fields to include in the response
            max_workers (int, optional): Maximum number of concurrent workers. Default is 10.

        Returns:
            list: List of image metadata
        """
        results = []

        def fetch_single_image(image_id):
            url = (
                f"{self.BASE_URL}/{image_id}"
                f"?access_token={self.TOKEN}"
                f"&fields={','.join(fields)}"
            )

            try:
                response = requests.get(url)
                if response.status_code == 200:
                    return response.json()
                else:
                    warnings.warn(f"Error fetching image {image_id}: {response.status_code}")
                    return None
            except Exception as e:
                warnings.warn(f"Exception fetching image {image_id}: {str(e)}")
                return None

        # Use ThreadPoolExecutor for parallel fetching
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit all tasks and create a map of future to image_id
            future_to_id = {executor.submit(fetch_single_image, image_id): image_id
                           for image_id in image_ids}

            # Process results as they complete with a progress bar
            for future in tqdm(as_completed(future_to_id),
                              total=len(image_ids),
                              desc="Fetching metadata"):
                result = future.result()
                if result:
                    results.append(result)

        return results

    def fetch_within_bbox(
        self,
        initial_bbox,
        start_date=None,
        end_date=None,
        fields=None,
        max_recursion_depth=25,
        use_coverage_tiles=True,
        max_images=5000,
        max_workers=10
    ):
        """
        Fetches images within a bounding box.

        Args:
            initial_bbox (list): The bounding box to fetch images from [west, south, east, north].
            start_date (str, optional): Start date for filtering images (YYYY-MM-DD).
            end_date (str, optional): End date for filtering images (YYYY-MM-DD).
            fields (list, optional): Fields to include in the response.
            max_recursion_depth (int, optional): Maximum depth for recursive fetching.
            use_coverage_tiles (bool, optional): Whether to use coverage tiles API for large areas.
            max_images (int, optional): Maximum number of images to process. Default is 5000.
            max_workers (int, optional): Maximum number of concurrent workers. Default is 10.

        Returns:
            GeoImageFrame: A GeoImageFrame containing the image data.
        """
        if fields is None:
            fields = self.FIELDS_LIST

        # Ensure required fields are included
        if "id" not in fields:
            fields.append("id")
        if "geometry" not in fields:
            fields.append("geometry")
        if not any(url_key in fields for url_key in self.IMAGE_URL_KEYS):
            fields.append("thumb_1024_url")

        start_timestamp = self._get_timestamp(start_date) if start_date else None
        end_timestamp = self._get_timestamp(end_date, True) if end_date else None

        if use_coverage_tiles:
            # Get coverage tiles for the area
            min_x, min_y, max_x, max_y = self._bbox_to_tile_coords(initial_bbox, self.ZOOM_LEVEL)

            all_image_ids = []
            print(f"Fetching {(max_x - min_x + 1) * (max_y - min_y + 1)} tiles...")

            # Fetch all tiles in the bounding box
            for x in range(min_x, max_x + 1):
                for y in range(min_y, max_y + 1):
                    features = self._fetch_coverage_tile(self.ZOOM_LEVEL, x, y)
                    image_ids = self._extract_image_ids_from_features(features)
                    all_image_ids.extend(image_ids)

                    # Check if we've reached the maximum number of images
                    if len(all_image_ids) >= max_images * 2:  # Get more than needed to allow for filtering
                        print(f"Reached maximum number of images ({max_images}), stopping tile fetching")
                        break

                # Check again after processing a row of tiles
                if len(all_image_ids) >= max_images * 2:
                    break

            print(f"Found {len(all_image_ids)} total images")

            # Remove duplicates
            all_image_ids = list(set(all_image_ids))
            print(f"After removing duplicates: {len(all_image_ids)} unique images")

            # Limit the number of images to process
            if len(all_image_ids) > max_images:
                print(f"Limiting to {max_images} images for processing")
                all_image_ids = all_image_ids[:max_images]

            # Fetch metadata for all images using multi-threading
            all_data = self._fetch_image_metadata(all_image_ids, fields, max_workers=max_workers)

            data = self._json_to_gdf(all_data)
            return GeoImageFrame(data, geometry="geometry")
        else:
            # Use traditional recursive fetching
            data = self._recursive_fetch(
                initial_bbox,
                fields,
                start_timestamp,
                end_timestamp,
                max_recursion_depth=max_recursion_depth
            )
            gdf = self._json_to_gdf(data)
            return GeoImageFrame(gdf, geometry="geometry")

    def fetch_by_id(self, image_id, fields=None):
        """
        Fetches an image by its ID.

        Args:
            image_id (str): The ID of the image to fetch.
            fields (list, optional): The fields to include in the response.

        Returns:
            GeoImageFrame: A GeoImageFrame containing the fetched image.

        Raises:
            Exception: If the connection to Mapillary API fails.
        """
        if fields is None:
            fields = self.FIELDS_LIST
        else:
            self._validate_fields(fields)
        url = (
            f"{self.BASE_URL}/{image_id}"
            f"?access_token={self.TOKEN}"
            f"&fields={','.join(fields)}"
        )
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(
                f"Error connecting to Mapillary API. Exception: {response.text}"
            )
        data = self._json_to_gdf([response.json()])
        return GeoImageFrame(data, geometry="geometry")

    def fetch_by_sequence(self, sequence_ids, fields=None):
        """
        Fetches images by their sequence IDs.

        Args:
            sequence_ids (list): The sequence IDs to fetch images from.
            fields (list, optional): The fields to include in the response.

        Returns:
            GeoImageFrame: A GeoImageFrame containing the fetched images.

        Raises:
            Exception: If the connection to Mapillary API fails.
        """
        if fields is None:
            fields = self.FIELDS_LIST
        else:
            self._validate_fields(fields)
        url = (
            f"{self.BASE_URL}/images"
            f"?access_token={self.TOKEN}"
            f"&sequence_ids={','.join(sequence_ids)}"
            f"&fields={','.join(fields)}"
        )
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(
                f"Error connecting to Mapillary API. Exception: {response.text}"
            )
        response_data = response.json().get("data")
        if len(response_data) == self.LIMIT:
            raise Exception(
                "Data count reached the limit. Please provide fewer sequence IDs."
            )

        data = self._json_to_gdf(response_data)
        return GeoImageFrame(data, geometry="geometry")

    @staticmethod
    def _get_timestamp(date_string, end_of_day=False):
        """
        Converts a date string to a timestamp.

        Args:
            date_string (str): The date string to convert.
            end_of_day (bool, optional): Whether to set the timestamp to the end of the day.

        Returns:
            str: The timestamp corresponding to the date string.
        """
        if not date_string:
            return None

        tz = timezone.utc
        dt = datetime.strptime(date_string, "%Y-%m-%d")
        if end_of_day:
            dt = dt.replace(hour=23, minute=59, second=59)
        timestamp = (
            dt.astimezone(tz).replace(microsecond=0).isoformat().replace("+00:00", "Z")
        )
        return timestamp

    def _process_timestamp(self, epoch_time_ms, lat, lng):
        """
        Converts the given epoch time in milliseconds to an ISO-formatted timestamp adjusted to the local timezone
        based on the provided latitude and longitude coordinates.

        Args:
            epoch_time_ms (int): Epoch time in milliseconds.
            lat (float): Latitude coordinate for the timezone conversion.
            lng (float): Longitude coordinate for the timezone conversion.

        Returns:
            str: An ISO-formatted timestamp in the local timezone if timezone information is found, otherwise in UTC.

        Example:
            >>> _process_timestamp(1630456103000, 37.7749, -122.4194)
            '2021-09-01T09:55:03-07:00'
        """
        if not epoch_time_ms:
            return None
        epoch_time = epoch_time_ms / 1000
        dt_utc = datetime.fromtimestamp(epoch_time, tz=timezone.utc)

        tz_name = self.TF.timezone_at(lat=lat, lng=lng)
        if tz_name:
            local_tz = pytz.timezone(tz_name)
            return dt_utc.astimezone(local_tz).isoformat()
        else:
            return dt_utc.isoformat()

    def _recursive_fetch(
        self,
        bbox,
        fields,
        start_timestamp=None,
        end_timestamp=None,
        current_depth=0,
        max_recursion_depth=None,
    ):
        """
        Recursively fetches images within a bounding box, considering timestamps.

        Args:
            bbox (list): The bounding box to fetch images from.
            fields (list): The fields to include in the response.
            start_timestamp (str, optional): The starting timestamp for filtering images.
            end_timestamp (str, optional): The ending timestamp for filtering images.
            current_depth (int, optional): Current depth of recursion.
            max_recursion_depth (int, optional): Maximum depth of recursion.

        Returns:
            list: A list of image data.

        Raises:
            Exception: If the connection to Mapillary API fails.
        """
        if max_recursion_depth is not None and current_depth > max_recursion_depth:
            warnings.warn(
                "Max recursion depth reached. Consider splitting requests."
            )
            return []

        url = (
            f"{self.BASE_URL}/images"
            f"?access_token={self.TOKEN}"
            f"&fields={','.join(fields)}"
            f"&bbox={','.join(str(i) for i in bbox)}"
            f"&limit={self.LIMIT}"
        )

        if start_timestamp:
            url += f"&start_captured_at={start_timestamp}"
        if end_timestamp:
            url += f"&end_captured_at={end_timestamp}"

        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(
                f"Error connecting to Mapillary API. Exception: {response.text}"
            )

        response_data = response.json().get("data")
        if len(response_data) == self.LIMIT:
            child_bboxes = self._split_bbox(bbox)
            data = []
            for child_bbox in child_bboxes:
                data.extend(
                    self._recursive_fetch(
                        child_bbox,
                        fields,
                        start_timestamp,
                        end_timestamp,
                        current_depth=current_depth + 1,
                        max_recursion_depth=max_recursion_depth,
                    )
                )
            return data
        else:
            return response_data

__init__(mapillary_token)

Initialize a Mapillary object.

Parameters:

Name Type Description Default
mapillary_token str

The authentication token for Mapillary.

required
Source code in landlensdb/handlers/cloud.py
72
73
74
75
76
77
78
79
def __init__(self, mapillary_token):
    """
    Initialize a Mapillary object.

    Args:
        mapillary_token (str): The authentication token for Mapillary.
    """
    self.TOKEN = mapillary_token

_bbox_to_tile_coords(bbox, zoom)

Convert a bounding box to tile coordinates at a given zoom level.

Parameters:

Name Type Description Default
bbox list

[west, south, east, north] coordinates

required
zoom int

Zoom level

required

Returns:

Name Type Description
tuple

(min_x, min_y, max_x, max_y) tile coordinates

Source code in landlensdb/handlers/cloud.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def _bbox_to_tile_coords(self, bbox, zoom):
    """
    Convert a bounding box to tile coordinates at a given zoom level.

    Args:
        bbox (list): [west, south, east, north] coordinates
        zoom (int): Zoom level

    Returns:
        tuple: (min_x, min_y, max_x, max_y) tile coordinates
    """
    def lat_to_tile_y(lat_deg, zoom):
        lat_rad = math.radians(lat_deg)
        n = 2.0 ** zoom
        return int((1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * n)

    def lon_to_tile_x(lon_deg, zoom):
        n = 2.0 ** zoom
        return int((lon_deg + 180.0) / 360.0 * n)

    west, south, east, north = bbox
    min_x = lon_to_tile_x(west, zoom)
    max_x = lon_to_tile_x(east, zoom)
    min_y = lat_to_tile_y(north, zoom)  # Note: y coordinates are inverted
    max_y = lat_to_tile_y(south, zoom)

    return min_x, min_y, max_x, max_y

_extract_image_ids_from_features(features)

Extracts image IDs from tile features.

Parameters:

Name Type Description Default
features list

List of features from a vector tile

required

Returns:

Name Type Description
list

List of image IDs

Source code in landlensdb/handlers/cloud.py
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
def _extract_image_ids_from_features(self, features):
    """
    Extracts image IDs from tile features.

    Args:
        features (list): List of features from a vector tile

    Returns:
        list: List of image IDs
    """
    image_ids = []

    for feature in features:
        if 'id' in feature.get('properties', {}):
            image_ids.append(str(feature['properties']['id']))
        elif 'image_id' in feature.get('properties', {}):
            image_ids.append(str(feature['properties']['image_id']))

    return image_ids

_fetch_coverage_tile(zoom, x, y)

Fetches a single coverage tile.

Parameters:

Name Type Description Default
zoom int

Zoom level

required
x int

Tile X coordinate

required
y int

Tile Y coordinate

required

Returns:

Name Type Description
list

Image features from the tile

Source code in landlensdb/handlers/cloud.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
def _fetch_coverage_tile(self, zoom, x, y):
    """
    Fetches a single coverage tile.

    Args:
        zoom (int): Zoom level
        x (int): Tile X coordinate
        y (int): Tile Y coordinate

    Returns:
        list: Image features from the tile
    """
    url = (
        f"{self.TILES_URL}/maps/vtp/mly1_public/2"
        f"/{zoom}/{x}/{y}"
        f"?access_token={self.TOKEN}"
    )

    try:
        response = requests.get(url)
        if response.status_code == 200:
            # Vector tiles are binary, not JSON
            if 'application/x-protobuf' in response.headers.get('content-type', ''):
                try:
                    # Decode the vector tile
                    tile_data = mapbox_vector_tile.decode(response.content)

                    # Check for image layer at zoom level 14
                    if 'image' in tile_data and zoom == 14:
                        return tile_data['image']['features']

                    # Check for sequence layer at zoom levels 6-14
                    elif 'sequence' in tile_data and 6 <= zoom <= 14:
                        return tile_data['sequence']['features']

                    # Check for overview layer at zoom levels 0-5
                    elif 'overview' in tile_data and 0 <= zoom <= 5:
                        return tile_data['overview']['features']

                    else:
                        warnings.warn(f"No usable layers found in tile {x},{y}")
                        return []

                except Exception as e:
                    warnings.warn(f"Error decoding vector tile {x},{y}: {str(e)}")
                    return []
            else:
                warnings.warn(f"Unexpected content type for tile {x},{y}")
                return []
        else:
            warnings.warn(f"Error fetching tile {x},{y}: {response.status_code}")
            return []
    except Exception as e:
        warnings.warn(f"Exception fetching tile {x},{y}: {str(e)}")
        return []

_fetch_image_metadata(image_ids, fields, max_workers=10)

Fetches metadata for multiple images using multi-threading.

Parameters:

Name Type Description Default
image_ids list

List of image IDs

required
fields list

Fields to include in the response

required
max_workers int

Maximum number of concurrent workers. Default is 10.

10

Returns:

Name Type Description
list

List of image metadata

Source code in landlensdb/handlers/cloud.py
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
def _fetch_image_metadata(self, image_ids, fields, max_workers=10):
    """
    Fetches metadata for multiple images using multi-threading.

    Args:
        image_ids (list): List of image IDs
        fields (list): Fields to include in the response
        max_workers (int, optional): Maximum number of concurrent workers. Default is 10.

    Returns:
        list: List of image metadata
    """
    results = []

    def fetch_single_image(image_id):
        url = (
            f"{self.BASE_URL}/{image_id}"
            f"?access_token={self.TOKEN}"
            f"&fields={','.join(fields)}"
        )

        try:
            response = requests.get(url)
            if response.status_code == 200:
                return response.json()
            else:
                warnings.warn(f"Error fetching image {image_id}: {response.status_code}")
                return None
        except Exception as e:
            warnings.warn(f"Exception fetching image {image_id}: {str(e)}")
            return None

    # Use ThreadPoolExecutor for parallel fetching
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all tasks and create a map of future to image_id
        future_to_id = {executor.submit(fetch_single_image, image_id): image_id
                       for image_id in image_ids}

        # Process results as they complete with a progress bar
        for future in tqdm(as_completed(future_to_id),
                          total=len(image_ids),
                          desc="Fetching metadata"):
            result = future.result()
            if result:
                results.append(result)

    return results

_get_timestamp(date_string, end_of_day=False) staticmethod

Converts a date string to a timestamp.

Parameters:

Name Type Description Default
date_string str

The date string to convert.

required
end_of_day bool

Whether to set the timestamp to the end of the day.

False

Returns:

Name Type Description
str

The timestamp corresponding to the date string.

Source code in landlensdb/handlers/cloud.py
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
@staticmethod
def _get_timestamp(date_string, end_of_day=False):
    """
    Converts a date string to a timestamp.

    Args:
        date_string (str): The date string to convert.
        end_of_day (bool, optional): Whether to set the timestamp to the end of the day.

    Returns:
        str: The timestamp corresponding to the date string.
    """
    if not date_string:
        return None

    tz = timezone.utc
    dt = datetime.strptime(date_string, "%Y-%m-%d")
    if end_of_day:
        dt = dt.replace(hour=23, minute=59, second=59)
    timestamp = (
        dt.astimezone(tz).replace(microsecond=0).isoformat().replace("+00:00", "Z")
    )
    return timestamp

_json_to_gdf(json_data)

Converts JSON data from Mapillary to a GeoDataFrame.

Parameters:

Name Type Description Default
json_data list

A list of JSON data from Mapillary.

required

Returns:

Name Type Description
GeoDataFrame

A GeoDataFrame containing the image data.

Source code in landlensdb/handlers/cloud.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def _json_to_gdf(self, json_data):
    """
    Converts JSON data from Mapillary to a GeoDataFrame.

    Args:
        json_data (list): A list of JSON data from Mapillary.

    Returns:
        GeoDataFrame: A GeoDataFrame containing the image data.
    """
    # Early return if no data
    if not json_data:
        return GeoDataFrame(geometry=[])

    for img in json_data:
        # Basic field conversions
        coords = img.get("geometry", {}).get("coordinates", [None, None])
        img["geometry"] = Point(coords)
        img["mly_id"] = img.pop("id")
        img["name"] = f"mly|{img['mly_id']}"

        # Handle computed geometry
        if "computed_geometry" in img:
            coords = img.get("computed_geometry", {}).get(
                "coordinates", [None, None]
            )
            img["computed_geometry"] = Point(coords)

        # Process timestamp with timezone
        if "captured_at" in img:
            lat = img["geometry"].y
            lng = img["geometry"].x
            img["captured_at"] = self._process_timestamp(
                img.get("captured_at"), lat, lng
            )

        # Set image URL from available options
        image_url_found = False
        for key in self.IMAGE_URL_KEYS:
            if key in img:
                img["image_url"] = str(img.pop(key))  # Explicitly convert to string
                image_url_found = True
                break

        # If no image URL was found, set a placeholder URL
        # Instead of using a direct Mapillary API URL that might fail,
        # we'll use a placeholder that indicates the image URL is missing
        if not image_url_found:
            img["image_url"] = f"placeholder://mapillary/{img['mly_id']}"

        # Convert list parameters to strings
        for key in ["camera_parameters", "computed_rotation"]:
            if key in img and isinstance(img[key], list):
                img[key] = ",".join(map(str, img[key]))

        # Calculate quality score if not present
        if "quality_score" not in img:
            quality_score = 0.0
            if "computed_compass_angle" in img:
                quality_score += 0.5  # Good compass data
            if "atomic_scale" in img:
                quality_score += 0.3  # Good scale data
            if img.get("camera_type"):
                quality_score += 0.2  # Camera type available
            img["quality_score"] = quality_score

    # Create GeoDataFrame
    gdf = GeoDataFrame(json_data, crs="EPSG:4326")
    gdf.set_geometry("geometry", inplace=True)

    # Sort by quality indicators and drop duplicates by sequence
    if "sequence" in gdf.columns:
        sort_columns = [col for col in self.QUALITY_INDICATORS if col in gdf.columns]
        if sort_columns:
            gdf = gdf.sort_values(sort_columns, ascending=False)
            gdf = gdf.drop_duplicates(subset=['sequence'], keep='first')

    # Ensure image_url is a string type
    if "image_url" in gdf.columns:
        gdf["image_url"] = gdf["image_url"].astype(str)

    return gdf

_process_timestamp(epoch_time_ms, lat, lng)

Converts the given epoch time in milliseconds to an ISO-formatted timestamp adjusted to the local timezone based on the provided latitude and longitude coordinates.

Parameters:

Name Type Description Default
epoch_time_ms int

Epoch time in milliseconds.

required
lat float

Latitude coordinate for the timezone conversion.

required
lng float

Longitude coordinate for the timezone conversion.

required

Returns:

Name Type Description
str

An ISO-formatted timestamp in the local timezone if timezone information is found, otherwise in UTC.

Example

_process_timestamp(1630456103000, 37.7749, -122.4194) '2021-09-01T09:55:03-07:00'

Source code in landlensdb/handlers/cloud.py
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
def _process_timestamp(self, epoch_time_ms, lat, lng):
    """
    Converts the given epoch time in milliseconds to an ISO-formatted timestamp adjusted to the local timezone
    based on the provided latitude and longitude coordinates.

    Args:
        epoch_time_ms (int): Epoch time in milliseconds.
        lat (float): Latitude coordinate for the timezone conversion.
        lng (float): Longitude coordinate for the timezone conversion.

    Returns:
        str: An ISO-formatted timestamp in the local timezone if timezone information is found, otherwise in UTC.

    Example:
        >>> _process_timestamp(1630456103000, 37.7749, -122.4194)
        '2021-09-01T09:55:03-07:00'
    """
    if not epoch_time_ms:
        return None
    epoch_time = epoch_time_ms / 1000
    dt_utc = datetime.fromtimestamp(epoch_time, tz=timezone.utc)

    tz_name = self.TF.timezone_at(lat=lat, lng=lng)
    if tz_name:
        local_tz = pytz.timezone(tz_name)
        return dt_utc.astimezone(local_tz).isoformat()
    else:
        return dt_utc.isoformat()

_recursive_fetch(bbox, fields, start_timestamp=None, end_timestamp=None, current_depth=0, max_recursion_depth=None)

Recursively fetches images within a bounding box, considering timestamps.

Parameters:

Name Type Description Default
bbox list

The bounding box to fetch images from.

required
fields list

The fields to include in the response.

required
start_timestamp str

The starting timestamp for filtering images.

None
end_timestamp str

The ending timestamp for filtering images.

None
current_depth int

Current depth of recursion.

0
max_recursion_depth int

Maximum depth of recursion.

None

Returns:

Name Type Description
list

A list of image data.

Raises:

Type Description
Exception

If the connection to Mapillary API fails.

Source code in landlensdb/handlers/cloud.py
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
def _recursive_fetch(
    self,
    bbox,
    fields,
    start_timestamp=None,
    end_timestamp=None,
    current_depth=0,
    max_recursion_depth=None,
):
    """
    Recursively fetches images within a bounding box, considering timestamps.

    Args:
        bbox (list): The bounding box to fetch images from.
        fields (list): The fields to include in the response.
        start_timestamp (str, optional): The starting timestamp for filtering images.
        end_timestamp (str, optional): The ending timestamp for filtering images.
        current_depth (int, optional): Current depth of recursion.
        max_recursion_depth (int, optional): Maximum depth of recursion.

    Returns:
        list: A list of image data.

    Raises:
        Exception: If the connection to Mapillary API fails.
    """
    if max_recursion_depth is not None and current_depth > max_recursion_depth:
        warnings.warn(
            "Max recursion depth reached. Consider splitting requests."
        )
        return []

    url = (
        f"{self.BASE_URL}/images"
        f"?access_token={self.TOKEN}"
        f"&fields={','.join(fields)}"
        f"&bbox={','.join(str(i) for i in bbox)}"
        f"&limit={self.LIMIT}"
    )

    if start_timestamp:
        url += f"&start_captured_at={start_timestamp}"
    if end_timestamp:
        url += f"&end_captured_at={end_timestamp}"

    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(
            f"Error connecting to Mapillary API. Exception: {response.text}"
        )

    response_data = response.json().get("data")
    if len(response_data) == self.LIMIT:
        child_bboxes = self._split_bbox(bbox)
        data = []
        for child_bbox in child_bboxes:
            data.extend(
                self._recursive_fetch(
                    child_bbox,
                    fields,
                    start_timestamp,
                    end_timestamp,
                    current_depth=current_depth + 1,
                    max_recursion_depth=max_recursion_depth,
                )
            )
        return data
    else:
        return response_data

_split_bbox(inner_bbox) staticmethod

Splits a bounding box into four quarters.

Parameters:

Name Type Description Default
inner_bbox list

A list representing the bounding box to split.

required

Returns:

Name Type Description
list

A list of four bounding boxes, each representing a quarter.

Source code in landlensdb/handlers/cloud.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
@staticmethod
def _split_bbox(inner_bbox):
    """
    Splits a bounding box into four quarters.

    Args:
        inner_bbox (list): A list representing the bounding box to split.

    Returns:
        list: A list of four bounding boxes, each representing a quarter.
    """
    x1, y1, x2, y2 = inner_bbox[:]
    xm = (x2 - x1) / 2
    ym = (y2 - y1) / 2

    q1 = [x1, y1, x1 + xm, y1 + ym]
    q2 = [x1 + xm, y1, x2, y1 + ym]
    q3 = [x1, y1 + ym, x1 + xm, y2]
    q4 = [x1 + xm, y1 + ym, x2, y2]

    return [q1, q2, q3, q4]

_tile_to_bbox(tile, zoom_level)

Converts tile coordinates to a bounding box.

Parameters:

Name Type Description Default
tile dict

Tile coordinates (x, y).

required
zoom_level int

The zoom level of the tile.

required

Returns:

Name Type Description
list

Bounding box coordinates [west, south, east, north].

Source code in landlensdb/handlers/cloud.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def _tile_to_bbox(self, tile, zoom_level):
    """
    Converts tile coordinates to a bounding box.

    Args:
        tile (dict): Tile coordinates (x, y).
        zoom_level (int): The zoom level of the tile.

    Returns:
        list: Bounding box coordinates [west, south, east, north].
    """
    x, y = tile['x'], tile['y']
    n = 2.0 ** zoom_level
    west = x / n * 360.0 - 180.0
    east = (x + 1) / n * 360.0 - 180.0

    def inv_lat(y_tile):
        return math.degrees(math.atan(math.sinh(math.pi * (1 - 2 * y_tile / n))))

    north = inv_lat(y)
    south = inv_lat(y + 1)

    return [west, south, east, north]

_validate_fields(fields)

Validates the fields for fetching data.

Parameters:

Name Type Description Default
fields list

The fields to be validated.

required

Raises:

Type Description
ValueError

If the required fields are missing.

Source code in landlensdb/handlers/cloud.py
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def _validate_fields(self, fields):
    """
    Validates the fields for fetching data.

    Args:
        fields (list): The fields to be validated.

    Raises:
        ValueError: If the required fields are missing.
    """
    if (
        "id" not in fields
        or "geometry" not in fields
        or not any(image_field in fields for image_field in self.IMAGE_URL_KEYS)
    ):
        raise ValueError(
            "Fields must contain 'id', 'geometry', and one of "
            + str(self.IMAGE_URL_KEYS)
        )

fetch_by_id(image_id, fields=None)

Fetches an image by its ID.

Parameters:

Name Type Description Default
image_id str

The ID of the image to fetch.

required
fields list

The fields to include in the response.

None

Returns:

Name Type Description
GeoImageFrame

A GeoImageFrame containing the fetched image.

Raises:

Type Description
Exception

If the connection to Mapillary API fails.

Source code in landlensdb/handlers/cloud.py
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
def fetch_by_id(self, image_id, fields=None):
    """
    Fetches an image by its ID.

    Args:
        image_id (str): The ID of the image to fetch.
        fields (list, optional): The fields to include in the response.

    Returns:
        GeoImageFrame: A GeoImageFrame containing the fetched image.

    Raises:
        Exception: If the connection to Mapillary API fails.
    """
    if fields is None:
        fields = self.FIELDS_LIST
    else:
        self._validate_fields(fields)
    url = (
        f"{self.BASE_URL}/{image_id}"
        f"?access_token={self.TOKEN}"
        f"&fields={','.join(fields)}"
    )
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(
            f"Error connecting to Mapillary API. Exception: {response.text}"
        )
    data = self._json_to_gdf([response.json()])
    return GeoImageFrame(data, geometry="geometry")

fetch_by_sequence(sequence_ids, fields=None)

Fetches images by their sequence IDs.

Parameters:

Name Type Description Default
sequence_ids list

The sequence IDs to fetch images from.

required
fields list

The fields to include in the response.

None

Returns:

Name Type Description
GeoImageFrame

A GeoImageFrame containing the fetched images.

Raises:

Type Description
Exception

If the connection to Mapillary API fails.

Source code in landlensdb/handlers/cloud.py
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
def fetch_by_sequence(self, sequence_ids, fields=None):
    """
    Fetches images by their sequence IDs.

    Args:
        sequence_ids (list): The sequence IDs to fetch images from.
        fields (list, optional): The fields to include in the response.

    Returns:
        GeoImageFrame: A GeoImageFrame containing the fetched images.

    Raises:
        Exception: If the connection to Mapillary API fails.
    """
    if fields is None:
        fields = self.FIELDS_LIST
    else:
        self._validate_fields(fields)
    url = (
        f"{self.BASE_URL}/images"
        f"?access_token={self.TOKEN}"
        f"&sequence_ids={','.join(sequence_ids)}"
        f"&fields={','.join(fields)}"
    )
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(
            f"Error connecting to Mapillary API. Exception: {response.text}"
        )
    response_data = response.json().get("data")
    if len(response_data) == self.LIMIT:
        raise Exception(
            "Data count reached the limit. Please provide fewer sequence IDs."
        )

    data = self._json_to_gdf(response_data)
    return GeoImageFrame(data, geometry="geometry")

fetch_within_bbox(initial_bbox, start_date=None, end_date=None, fields=None, max_recursion_depth=25, use_coverage_tiles=True, max_images=5000, max_workers=10)

Fetches images within a bounding box.

Parameters:

Name Type Description Default
initial_bbox list

The bounding box to fetch images from [west, south, east, north].

required
start_date str

Start date for filtering images (YYYY-MM-DD).

None
end_date str

End date for filtering images (YYYY-MM-DD).

None
fields list

Fields to include in the response.

None
max_recursion_depth int

Maximum depth for recursive fetching.

25
use_coverage_tiles bool

Whether to use coverage tiles API for large areas.

True
max_images int

Maximum number of images to process. Default is 5000.

5000
max_workers int

Maximum number of concurrent workers. Default is 10.

10

Returns:

Name Type Description
GeoImageFrame

A GeoImageFrame containing the image data.

Source code in landlensdb/handlers/cloud.py
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
def fetch_within_bbox(
    self,
    initial_bbox,
    start_date=None,
    end_date=None,
    fields=None,
    max_recursion_depth=25,
    use_coverage_tiles=True,
    max_images=5000,
    max_workers=10
):
    """
    Fetches images within a bounding box.

    Args:
        initial_bbox (list): The bounding box to fetch images from [west, south, east, north].
        start_date (str, optional): Start date for filtering images (YYYY-MM-DD).
        end_date (str, optional): End date for filtering images (YYYY-MM-DD).
        fields (list, optional): Fields to include in the response.
        max_recursion_depth (int, optional): Maximum depth for recursive fetching.
        use_coverage_tiles (bool, optional): Whether to use coverage tiles API for large areas.
        max_images (int, optional): Maximum number of images to process. Default is 5000.
        max_workers (int, optional): Maximum number of concurrent workers. Default is 10.

    Returns:
        GeoImageFrame: A GeoImageFrame containing the image data.
    """
    if fields is None:
        fields = self.FIELDS_LIST

    # Ensure required fields are included
    if "id" not in fields:
        fields.append("id")
    if "geometry" not in fields:
        fields.append("geometry")
    if not any(url_key in fields for url_key in self.IMAGE_URL_KEYS):
        fields.append("thumb_1024_url")

    start_timestamp = self._get_timestamp(start_date) if start_date else None
    end_timestamp = self._get_timestamp(end_date, True) if end_date else None

    if use_coverage_tiles:
        # Get coverage tiles for the area
        min_x, min_y, max_x, max_y = self._bbox_to_tile_coords(initial_bbox, self.ZOOM_LEVEL)

        all_image_ids = []
        print(f"Fetching {(max_x - min_x + 1) * (max_y - min_y + 1)} tiles...")

        # Fetch all tiles in the bounding box
        for x in range(min_x, max_x + 1):
            for y in range(min_y, max_y + 1):
                features = self._fetch_coverage_tile(self.ZOOM_LEVEL, x, y)
                image_ids = self._extract_image_ids_from_features(features)
                all_image_ids.extend(image_ids)

                # Check if we've reached the maximum number of images
                if len(all_image_ids) >= max_images * 2:  # Get more than needed to allow for filtering
                    print(f"Reached maximum number of images ({max_images}), stopping tile fetching")
                    break

            # Check again after processing a row of tiles
            if len(all_image_ids) >= max_images * 2:
                break

        print(f"Found {len(all_image_ids)} total images")

        # Remove duplicates
        all_image_ids = list(set(all_image_ids))
        print(f"After removing duplicates: {len(all_image_ids)} unique images")

        # Limit the number of images to process
        if len(all_image_ids) > max_images:
            print(f"Limiting to {max_images} images for processing")
            all_image_ids = all_image_ids[:max_images]

        # Fetch metadata for all images using multi-threading
        all_data = self._fetch_image_metadata(all_image_ids, fields, max_workers=max_workers)

        data = self._json_to_gdf(all_data)
        return GeoImageFrame(data, geometry="geometry")
    else:
        # Use traditional recursive fetching
        data = self._recursive_fetch(
            initial_bbox,
            fields,
            start_timestamp,
            end_timestamp,
            max_recursion_depth=max_recursion_depth
        )
        gdf = self._json_to_gdf(data)
        return GeoImageFrame(gdf, geometry="geometry")

DB Handler

Postgres

A class for managing image-related postgres database operations.

Attributes:

Name Type Description
DATABASE_URL str

The URL of the database to connect to.

engine Engine

SQLAlchemy engine for database connections.

result_set ResultProxy

The result of the last query executed.

selected_table Table

The table object for query operations.

Source code in landlensdb/handlers/db.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
class Postgres:
    """
    A class for managing image-related postgres database operations.

    Attributes:
        DATABASE_URL (str): The URL of the database to connect to.
        engine (Engine): SQLAlchemy engine for database connections.
        result_set (ResultProxy): The result of the last query executed.
        selected_table (Table): The table object for query operations.
    """

    def __init__(self, database_url):
        """
        Initializes the ImageDB class with the given database URL.

        Args:
            database_url (str): The URL of the database to connect to.
        """
        self.DATABASE_URL = database_url
        self.engine = create_engine(self.DATABASE_URL)
        self.result_set = None
        self.selected_table = None

    @staticmethod
    def _convert_points_to_wkt(record):
        """
        Converts Point objects to WKT (Well-Known Text) format.

        Args:
            record (dict): A dictionary containing keys and values, where values can be Point objects.

        Returns:
            dict: The record with Point objects converted to WKT strings.
        """
        for key, value in record.items():
            if isinstance(value, Point):
                record[key] = value.wkt
        return record

    @staticmethod
    def _convert_dicts_to_json(record):
        """
        Converts dictionary values in a record to JSON strings.

        Args:
            record (dict): A dictionary where values may include other dictionaries.

        Returns:
            dict: The modified record with dict values converted to JSON strings.
        """
        for key, value in record.items():
            if isinstance(value, dict):
                record[key] = json.dumps(value)
        return record

    def table(self, table_name):
        """
        Selects a table for performing queries on.

        Args:
            table_name (str): Name of the table to select.

        Returns:
            ImageDB: Returns self to enable method chaining.
        """
        metadata = MetaData()
        self.selected_table = Table(table_name, metadata, autoload_with=self.engine)
        self.result_set = self.selected_table.select()
        return self

    def filter(self, **kwargs):
        """
        Applies filters to the selected table based on provided conditions.

        Args:
            **kwargs: Key-value pairs representing filters to apply.

        Returns:
            ImageDB: Returns self to enable method chaining.

        Raises:
            ValueError: If an unsupported operation or a nonexistent column is specified.
        """
        filters = []

        for k, v in kwargs.items():
            if "__" in k:
                field_name, operation = k.split("__", 1)
            else:
                field_name = k
                operation = "eq"

            column = getattr(self.selected_table.columns, field_name, None)
            if column is None:
                raise ValueError(
                    f"Column '{field_name}' not found in table '{self.selected_table.name}'"
                )

            if operation == "eq":
                filters.append(column == v)
            elif operation == "gt":
                filters.append(column > v)
            elif operation == "lt":
                filters.append(column < v)
            elif operation == "gte":
                filters.append(column >= v)
            elif operation == "lte":
                filters.append(column <= v)
            else:
                raise ValueError(f"Unsupported operation '{operation}'")

        self.result_set = self.result_set.where(and_(*filters))
        return self

    def all(self):
        """
        Executes the query and returns the result as a GeoImageFrame.

        Returns:
            GeoImageFrame: The result of the query as a GeoImageFrame object.

        Raises:
            TypeError: If geometries are not of type Point.
        """
        with self.engine.connect() as conn:
            result = conn.execute(self.result_set)
            data = [row._asdict() for row in result.fetchall()]

        if not data:
            return GeoImageFrame([])  # Adjust according to your GeoImageFrame handling

        df_data = {col: [] for col in data[0].keys()}

        for d in data:
            for col, value in d.items():
                if isinstance(value, WKBElement):
                    try:
                        point_geom = loads(
                            bytes(value.data)
                        )  # convert WKBElement to Shapely geometry
                        if point_geom.geom_type != "Point":
                            raise TypeError("All geometries must be of type Point.")
                        df_data[col].append(point_geom)
                    except Exception as e:
                        print(f"Failed to process data {value.data}. Error: {e}")
                else:
                    df_data[col].append(value)

        return GeoImageFrame(df_data)

    def get_distinct_values(self, table_name, column_name):
        """
        Gets distinct values from a specific column of a table.

        Args:
            table_name (str): Name of the table to query.
            column_name (str): Name of the column to get distinct values from.

        Returns:
            list: A list of distinct values from the specified column.

        Raises:
            ValueError: If the specified column is not found in the table.
        """
        metadata = MetaData()
        metadata.reflect(bind=self.engine)

        if table_name not in metadata.tables:
            raise ValueError(f"Table '{table_name}' not found.")

        table = metadata.tables[table_name]

        if column_name not in table.columns:
            raise ValueError(
                f"Column '{column_name}' not found in table '{table_name}'"
            )

        column = table.columns[column_name]

        distinct_query = select(column).distinct()
        with self.engine.connect() as conn:
            result = conn.execute(distinct_query)

        distinct_values = [row[0] for row in result.fetchall()]
        return distinct_values

    def upsert_images(self, gif, table_name, conflict="update"):
        """
        Inserts or updates image data in the specified table.

        Args:
            gif (GeoImageFrame): The data frame containing image data.
            table_name (str): The name of the table to upsert into.
            conflict (str, optional): Conflict resolution strategy ("update" or "nothing"). Defaults to "update".

        Raises:
            ValueError: If an invalid conflict resolution type is provided.
        """
        data = gif.to_dict(orient="records")

        meta = MetaData()
        table = Table(table_name, meta, autoload_with=self.engine)

        with self.engine.begin() as conn:
            for record in data:
                record = self._convert_points_to_wkt(record)
                record = self._convert_dicts_to_json(record)
                insert_stmt = insert(table).values(**record)
                if conflict == "update":
                    updates = {
                        key: getattr(insert_stmt.excluded, key)
                        for key in record
                        if key != "image_url"
                    }
                    constraint_name = f"{table.name}_image_url_key"
                    on_conflict_stmt = insert_stmt.on_conflict_do_update(
                        constraint=constraint_name,
                        set_=updates
                    )
                elif conflict == "nothing":
                    on_conflict_stmt = insert_stmt.on_conflict_do_nothing()
                else:
                    raise ValueError(
                        "Invalid conflict resolution type. Choose 'update' or 'nothing'."
                    )

                conn.execute(on_conflict_stmt)

__init__(database_url)

Initializes the ImageDB class with the given database URL.

Parameters:

Name Type Description Default
database_url str

The URL of the database to connect to.

required
Source code in landlensdb/handlers/db.py
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, database_url):
    """
    Initializes the ImageDB class with the given database URL.

    Args:
        database_url (str): The URL of the database to connect to.
    """
    self.DATABASE_URL = database_url
    self.engine = create_engine(self.DATABASE_URL)
    self.result_set = None
    self.selected_table = None

_convert_dicts_to_json(record) staticmethod

Converts dictionary values in a record to JSON strings.

Parameters:

Name Type Description Default
record dict

A dictionary where values may include other dictionaries.

required

Returns:

Name Type Description
dict

The modified record with dict values converted to JSON strings.

Source code in landlensdb/handlers/db.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@staticmethod
def _convert_dicts_to_json(record):
    """
    Converts dictionary values in a record to JSON strings.

    Args:
        record (dict): A dictionary where values may include other dictionaries.

    Returns:
        dict: The modified record with dict values converted to JSON strings.
    """
    for key, value in record.items():
        if isinstance(value, dict):
            record[key] = json.dumps(value)
    return record

_convert_points_to_wkt(record) staticmethod

Converts Point objects to WKT (Well-Known Text) format.

Parameters:

Name Type Description Default
record dict

A dictionary containing keys and values, where values can be Point objects.

required

Returns:

Name Type Description
dict

The record with Point objects converted to WKT strings.

Source code in landlensdb/handlers/db.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
@staticmethod
def _convert_points_to_wkt(record):
    """
    Converts Point objects to WKT (Well-Known Text) format.

    Args:
        record (dict): A dictionary containing keys and values, where values can be Point objects.

    Returns:
        dict: The record with Point objects converted to WKT strings.
    """
    for key, value in record.items():
        if isinstance(value, Point):
            record[key] = value.wkt
    return record

all()

Executes the query and returns the result as a GeoImageFrame.

Returns:

Name Type Description
GeoImageFrame

The result of the query as a GeoImageFrame object.

Raises:

Type Description
TypeError

If geometries are not of type Point.

Source code in landlensdb/handlers/db.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def all(self):
    """
    Executes the query and returns the result as a GeoImageFrame.

    Returns:
        GeoImageFrame: The result of the query as a GeoImageFrame object.

    Raises:
        TypeError: If geometries are not of type Point.
    """
    with self.engine.connect() as conn:
        result = conn.execute(self.result_set)
        data = [row._asdict() for row in result.fetchall()]

    if not data:
        return GeoImageFrame([])  # Adjust according to your GeoImageFrame handling

    df_data = {col: [] for col in data[0].keys()}

    for d in data:
        for col, value in d.items():
            if isinstance(value, WKBElement):
                try:
                    point_geom = loads(
                        bytes(value.data)
                    )  # convert WKBElement to Shapely geometry
                    if point_geom.geom_type != "Point":
                        raise TypeError("All geometries must be of type Point.")
                    df_data[col].append(point_geom)
                except Exception as e:
                    print(f"Failed to process data {value.data}. Error: {e}")
            else:
                df_data[col].append(value)

    return GeoImageFrame(df_data)

filter(**kwargs)

Applies filters to the selected table based on provided conditions.

Parameters:

Name Type Description Default
**kwargs

Key-value pairs representing filters to apply.

{}

Returns:

Name Type Description
ImageDB

Returns self to enable method chaining.

Raises:

Type Description
ValueError

If an unsupported operation or a nonexistent column is specified.

Source code in landlensdb/handlers/db.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def filter(self, **kwargs):
    """
    Applies filters to the selected table based on provided conditions.

    Args:
        **kwargs: Key-value pairs representing filters to apply.

    Returns:
        ImageDB: Returns self to enable method chaining.

    Raises:
        ValueError: If an unsupported operation or a nonexistent column is specified.
    """
    filters = []

    for k, v in kwargs.items():
        if "__" in k:
            field_name, operation = k.split("__", 1)
        else:
            field_name = k
            operation = "eq"

        column = getattr(self.selected_table.columns, field_name, None)
        if column is None:
            raise ValueError(
                f"Column '{field_name}' not found in table '{self.selected_table.name}'"
            )

        if operation == "eq":
            filters.append(column == v)
        elif operation == "gt":
            filters.append(column > v)
        elif operation == "lt":
            filters.append(column < v)
        elif operation == "gte":
            filters.append(column >= v)
        elif operation == "lte":
            filters.append(column <= v)
        else:
            raise ValueError(f"Unsupported operation '{operation}'")

    self.result_set = self.result_set.where(and_(*filters))
    return self

get_distinct_values(table_name, column_name)

Gets distinct values from a specific column of a table.

Parameters:

Name Type Description Default
table_name str

Name of the table to query.

required
column_name str

Name of the column to get distinct values from.

required

Returns:

Name Type Description
list

A list of distinct values from the specified column.

Raises:

Type Description
ValueError

If the specified column is not found in the table.

Source code in landlensdb/handlers/db.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def get_distinct_values(self, table_name, column_name):
    """
    Gets distinct values from a specific column of a table.

    Args:
        table_name (str): Name of the table to query.
        column_name (str): Name of the column to get distinct values from.

    Returns:
        list: A list of distinct values from the specified column.

    Raises:
        ValueError: If the specified column is not found in the table.
    """
    metadata = MetaData()
    metadata.reflect(bind=self.engine)

    if table_name not in metadata.tables:
        raise ValueError(f"Table '{table_name}' not found.")

    table = metadata.tables[table_name]

    if column_name not in table.columns:
        raise ValueError(
            f"Column '{column_name}' not found in table '{table_name}'"
        )

    column = table.columns[column_name]

    distinct_query = select(column).distinct()
    with self.engine.connect() as conn:
        result = conn.execute(distinct_query)

    distinct_values = [row[0] for row in result.fetchall()]
    return distinct_values

table(table_name)

Selects a table for performing queries on.

Parameters:

Name Type Description Default
table_name str

Name of the table to select.

required

Returns:

Name Type Description
ImageDB

Returns self to enable method chaining.

Source code in landlensdb/handlers/db.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def table(self, table_name):
    """
    Selects a table for performing queries on.

    Args:
        table_name (str): Name of the table to select.

    Returns:
        ImageDB: Returns self to enable method chaining.
    """
    metadata = MetaData()
    self.selected_table = Table(table_name, metadata, autoload_with=self.engine)
    self.result_set = self.selected_table.select()
    return self

upsert_images(gif, table_name, conflict='update')

Inserts or updates image data in the specified table.

Parameters:

Name Type Description Default
gif GeoImageFrame

The data frame containing image data.

required
table_name str

The name of the table to upsert into.

required
conflict str

Conflict resolution strategy ("update" or "nothing"). Defaults to "update".

'update'

Raises:

Type Description
ValueError

If an invalid conflict resolution type is provided.

Source code in landlensdb/handlers/db.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def upsert_images(self, gif, table_name, conflict="update"):
    """
    Inserts or updates image data in the specified table.

    Args:
        gif (GeoImageFrame): The data frame containing image data.
        table_name (str): The name of the table to upsert into.
        conflict (str, optional): Conflict resolution strategy ("update" or "nothing"). Defaults to "update".

    Raises:
        ValueError: If an invalid conflict resolution type is provided.
    """
    data = gif.to_dict(orient="records")

    meta = MetaData()
    table = Table(table_name, meta, autoload_with=self.engine)

    with self.engine.begin() as conn:
        for record in data:
            record = self._convert_points_to_wkt(record)
            record = self._convert_dicts_to_json(record)
            insert_stmt = insert(table).values(**record)
            if conflict == "update":
                updates = {
                    key: getattr(insert_stmt.excluded, key)
                    for key in record
                    if key != "image_url"
                }
                constraint_name = f"{table.name}_image_url_key"
                on_conflict_stmt = insert_stmt.on_conflict_do_update(
                    constraint=constraint_name,
                    set_=updates
                )
            elif conflict == "nothing":
                on_conflict_stmt = insert_stmt.on_conflict_do_nothing()
            else:
                raise ValueError(
                    "Invalid conflict resolution type. Choose 'update' or 'nothing'."
                )

            conn.execute(on_conflict_stmt)

Image Handler

Local

A class to process EXIF data from images, mainly focusing on extracting geotagging information.

This class includes methods to extract various camera and image properties, such as focal length, camera type, coordinates, and other related data.

Source code in landlensdb/handlers/image.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
class Local:
    """
    A class to process EXIF data from images, mainly focusing on extracting geotagging information.

    This class includes methods to extract various camera and image properties, such as focal length,
    camera type, coordinates, and other related data.
    """

    @staticmethod
    def _get_camera_model(exif_data):
        """
        Extracts the camera model from the EXIF data.

        Args:
            exif_data (dict): The EXIF data.

        Returns:
            str: Camera model if available, otherwise None.
        """
        return exif_data.get("Model", "").strip()

    @staticmethod
    def _infer_camera_type(focal_length, camera_model=None):
        """
        Infers the camera type based on the focal length and camera model.

        Args:
            focal_length (float): The focal length of the camera.
            camera_model (str): The camera model.

        Returns:
            str: One of "fisheye", "perspective", or "360-degree".
        """
        if not focal_length and not camera_model:
            return np.nan

        known_360_cameras = KNOWN_CAMERAS.get("360 Models", [])

        if camera_model in known_360_cameras:
            return "360-degree"

        # Further classification based on focal length
        if focal_length < 1.5:
            return "fisheye"
        else:
            return "perspective"

    @staticmethod
    def get_exif_data(img):
        """
        Retrieves the EXIF data from an image.

        Args:
            img (PIL.Image.Image): The image to extract EXIF data from.

        Returns:
            dict: A dictionary containing the EXIF data.
        """
        exif_data = {}
        info = img._getexif()
        if info:
            for tag, value in info.items():
                tag_name = TAGS.get(tag, tag)
                if tag_name == "GPSInfo":
                    gps_info = {}
                    for t in value:
                        sub_tag_name = GPSTAGS.get(t, t)
                        gps_info[sub_tag_name] = value[t]
                    exif_data[tag_name] = gps_info
                else:
                    exif_data[tag_name] = value
        return exif_data

    @staticmethod
    def create_thumbnail(image_path, size=(256, 256)):
        """
        Creates a thumbnail for the given image while preserving aspect ratio.

        Args:
            image_path (str): Path to the original image
            size (tuple): Desired thumbnail size as (width, height). Default is (256, 256)

        Returns:
            str: Path to the created thumbnail

        Raises:
            FileNotFoundError: If the image file doesn't exist
            ValueError: If the image cannot be opened or processed
        """
        if not os.path.exists(image_path):
            raise FileNotFoundError(f"Image file not found: {image_path}")

        # Create thumbnails directory in the same directory as the original image
        original_dir = os.path.dirname(image_path)
        thumbnail_dir = os.path.join(original_dir, "thumbnails")
        os.makedirs(thumbnail_dir, exist_ok=True)

        # Generate thumbnail filename
        original_filename = os.path.basename(image_path)
        thumbnail_filename = f"thumb_{original_filename}"
        thumbnail_path = os.path.join(thumbnail_dir, thumbnail_filename)

        try:
            with Image.open(image_path) as img:
                # Convert to RGB if necessary
                if img.mode in ('RGBA', 'LA'):
                    img = img.convert('RGB')

                # Calculate new dimensions preserving aspect ratio
                img.thumbnail(size, Image.Resampling.LANCZOS)

                # Save thumbnail
                img.save(thumbnail_path, "JPEG", quality=85)
                return thumbnail_path

        except Exception as e:
            raise ValueError(f"Error creating thumbnail for {image_path}: {str(e)}")

    @staticmethod
    def _to_decimal(coord_tuple):
        """
        Converts coordinates from degrees, minutes, and seconds to decimal.

        Args:
            coord_tuple (tuple or str): The coordinate tuple to convert.

        Returns:
            float: Decimal representation of the coordinates.
        """
        if isinstance(coord_tuple, tuple) and len(coord_tuple) == 3:
            return (
                float(coord_tuple[0])
                + float(coord_tuple[1]) / 60
                + float(coord_tuple[2]) / 3600
            )
        elif isinstance(coord_tuple, str) and "/" in coord_tuple:
            num, denom = coord_tuple.split("/")
            if float(denom) != 0:
                return float(num) / float(denom)
            else:
                return None
        return coord_tuple

    @classmethod
    def _get_geotagging(cls, exif):
        """
        Extracts geotagging information from EXIF metadata.

        Args:
            exif (dict): The EXIF metadata.

        Returns:
            dict: A dictionary containing the geotagging information.

        Raises:
            ValueError: If no EXIF metadata found or no GPSInfo tag found.
        """
        if not exif:
            raise ValueError("No EXIF metadata found")

        idx = None
        for tag, label in TAGS.items():
            if label == "GPSInfo":
                idx = tag
                break

        if idx is None:
            raise ValueError("No GPSInfo tag found in TAGS.")

        gps_data = exif.get("GPSInfo", exif.get(idx, None))
        if not gps_data:
            raise ValueError("No EXIF geotagging found")

        geotagging = {}
        for key, val in GPSTAGS.items():
            data_value = gps_data.get(key) or gps_data.get(val)
            if data_value:
                geotagging[val] = data_value

        return geotagging

    @classmethod
    def _get_image_altitude(cls, geotags):
        """
        Retrieves the altitude information from geotags.

        Args:
            geotags (dict): The geotags information.

        Returns:
            float: Altitude information if available, otherwise None.
        """
        if "GPSAltitude" in geotags:
            return geotags["GPSAltitude"]
        return None

    @classmethod
    def _get_image_direction(cls, geotags):
        """
        Retrieves the image direction information from geotags.

        Args:
            geotags (dict): The geotags information.

        Returns:
            float: Image direction information if available, otherwise None.
        """
        if "GPSImgDirection" in geotags:
            return geotags["GPSImgDirection"]
        return None

    @classmethod
    def _get_coordinates(cls, geotags):
        """
        Retrieves the latitude and longitude coordinates from geotags.

        Args:
            geotags (dict): The geotags information.

        Returns:
            tuple: Latitude and longitude coordinates.

        Raises:
            ValueError: If the coordinates are invalid.
        """
        lat = cls._to_decimal(geotags["GPSLatitude"])
        lon = cls._to_decimal(geotags["GPSLongitude"])

        if geotags["GPSLatitudeRef"] == "S":
            lat = -lat

        if geotags["GPSLongitudeRef"] == "W":
            lon = -lon

        return lat, lon

    @staticmethod
    def _get_focal_length(exif_data):
        """
        Retrieves the focal length from the EXIF data.

        Args:
            exif_data (dict): The EXIF data.

        Returns:
            float: Focal length if available, otherwise None.
        """
        focal_length = exif_data.get("FocalLength", None)

        if focal_length is None:
            return None

        if isinstance(focal_length, numbers.Number):
            return float(focal_length)

        elif (
            isinstance(focal_length, tuple)
            and len(focal_length) == 2
            and focal_length[1] != 0
        ):
            return float(focal_length[0]) / focal_length[1]

        elif (
            hasattr(focal_length, "num")
            and hasattr(focal_length, "den")
            and focal_length.den != 0
        ):
            return float(focal_length.num) / focal_length.den

        else:
            return None

    @classmethod
    def load_images(cls, directory, additional_columns=None, create_thumbnails=True, thumbnail_size=(256, 256)):
        """
        Loads images from a given directory, extracts relevant information, and returns it in a GeoImageFrame.

        Args:
            directory (str): Path to the directory containing images.
            additional_columns (list, optional): List of additional column names or tuples containing column name and EXIF tag.
            create_thumbnails (bool): Whether to create thumbnails for the images. Defaults to True.
            thumbnail_size (tuple): Size for generated thumbnails as (width, height). Defaults to (256, 256).

        Returns:
            GeoImageFrame: Frame containing the data extracted from the images.

        Raises:
            ValueError: If no valid images are found in the directory.

        Examples:
            >>> directory = "/path/to/images"
            >>> image_data = Local.load_images(directory, create_thumbnails=True)
        """
        tf = TimezoneFinder()
        data = []
        valid_image_count = 0
        for root, dirs, files in os.walk(directory):
            # Skip thumbnails directory
            if "thumbnails" in dirs:
                dirs.remove("thumbnails")
            for file in files:
                if file.lower().endswith((".png", ".jpg", ".jpeg")):
                    valid_image_count += 1
                    filepath = os.path.join(root, file)
                    img = Image.open(filepath)
                    exif_data = cls.get_exif_data(img)
                    try:
                        geotags = cls._get_geotagging(exif_data)
                        lat, lon = cls._get_coordinates(geotags)
                        if lat is None or lon is None:
                            raise ValueError(
                                f"Invalid coordinates for {filepath}: Latitude: {lat}, Longitude: {lon}"
                            )
                        geometry = Point(lon, lat)
                    except Exception as e:
                        warnings.warn(
                            f"Error extracting geotags for {filepath}: {str(e)}. Skipped."
                        )
                        continue
                    focal_length = cls._get_focal_length(exif_data)
                    camera_model = cls._get_camera_model(exif_data)
                    camera_type = cls._infer_camera_type(focal_length, camera_model)

                    k1 = None
                    k2 = None
                    if None in [focal_length, k1, k2]:
                        camera_parameters = np.nan
                    else:
                        camera_parameters = ",".join(
                            [str(focal_length), str(k1), str(k2)]
                        )

                    captured_at_str = exif_data.get("DateTime", None)
                    if captured_at_str and geometry:
                        captured_at_naive = datetime.strptime(
                            captured_at_str, "%Y:%m:%d %H:%M:%S"
                        )
                        tz_name = tf.timezone_at(lat=lat, lng=lon)
                        if tz_name:
                            local_tz = pytz.timezone(tz_name)
                            captured_at = local_tz.localize(
                                captured_at_naive
                            ).isoformat()
                        else:
                            captured_at = captured_at_naive.isoformat()
                    else:
                        captured_at = None

                    altitude = np.float32(cls._get_image_altitude(geotags))
                    compass_angle = np.float32(cls._get_image_direction(geotags))
                    exif_orientation = np.float32(exif_data.get("Orientation", None))

                    # Generate thumbnail if requested
                    thumb_url = None
                    if create_thumbnails:
                        try:
                            # Check if thumbnail already exists
                            thumbnail_dir = os.path.join(os.path.dirname(filepath), "thumbnails")
                            thumb_filename = f"thumb_{os.path.basename(filepath)}"
                            thumb_path = os.path.join(thumbnail_dir, thumb_filename)

                            if os.path.exists(thumb_path):
                                thumb_url = thumb_path
                            else:
                                thumb_url = cls.create_thumbnail(filepath, size=thumbnail_size)
                        except Exception as e:
                            warnings.warn(f"Error creating thumbnail for {filepath}: {str(e)}")

                    image_data = {
                        "name": filepath.split("/")[-1],
                        "altitude": altitude,
                        "camera_type": camera_type,
                        "camera_parameters": camera_parameters,
                        "captured_at": captured_at,
                        "compass_angle": compass_angle,
                        "exif_orientation": exif_orientation,
                        "image_url": filepath,
                        "thumb_url": thumb_url,
                        "geometry": geometry,
                    }

                    for column_info in additional_columns or []:
                        if isinstance(column_info, str):
                            image_data[column_info] = np.nan
                        elif isinstance(column_info, tuple):
                            col_name, exif_tag = column_info
                            image_data[col_name] = exif_data.get(exif_tag, np.nan)

                    data.append(image_data)

        if valid_image_count == 0:
            raise ValueError("The directory does not contain any valid images")

        gif = GeoImageFrame(data, geometry="geometry")
        gif.set_crs(epsg=4326, inplace=True)
        return gif

_get_camera_model(exif_data) staticmethod

Extracts the camera model from the EXIF data.

Parameters:

Name Type Description Default
exif_data dict

The EXIF data.

required

Returns:

Name Type Description
str

Camera model if available, otherwise None.

Source code in landlensdb/handlers/image.py
30
31
32
33
34
35
36
37
38
39
40
41
@staticmethod
def _get_camera_model(exif_data):
    """
    Extracts the camera model from the EXIF data.

    Args:
        exif_data (dict): The EXIF data.

    Returns:
        str: Camera model if available, otherwise None.
    """
    return exif_data.get("Model", "").strip()

_get_coordinates(geotags) classmethod

Retrieves the latitude and longitude coordinates from geotags.

Parameters:

Name Type Description Default
geotags dict

The geotags information.

required

Returns:

Name Type Description
tuple

Latitude and longitude coordinates.

Raises:

Type Description
ValueError

If the coordinates are invalid.

Source code in landlensdb/handlers/image.py
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
@classmethod
def _get_coordinates(cls, geotags):
    """
    Retrieves the latitude and longitude coordinates from geotags.

    Args:
        geotags (dict): The geotags information.

    Returns:
        tuple: Latitude and longitude coordinates.

    Raises:
        ValueError: If the coordinates are invalid.
    """
    lat = cls._to_decimal(geotags["GPSLatitude"])
    lon = cls._to_decimal(geotags["GPSLongitude"])

    if geotags["GPSLatitudeRef"] == "S":
        lat = -lat

    if geotags["GPSLongitudeRef"] == "W":
        lon = -lon

    return lat, lon

_get_focal_length(exif_data) staticmethod

Retrieves the focal length from the EXIF data.

Parameters:

Name Type Description Default
exif_data dict

The EXIF data.

required

Returns:

Name Type Description
float

Focal length if available, otherwise None.

Source code in landlensdb/handlers/image.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
@staticmethod
def _get_focal_length(exif_data):
    """
    Retrieves the focal length from the EXIF data.

    Args:
        exif_data (dict): The EXIF data.

    Returns:
        float: Focal length if available, otherwise None.
    """
    focal_length = exif_data.get("FocalLength", None)

    if focal_length is None:
        return None

    if isinstance(focal_length, numbers.Number):
        return float(focal_length)

    elif (
        isinstance(focal_length, tuple)
        and len(focal_length) == 2
        and focal_length[1] != 0
    ):
        return float(focal_length[0]) / focal_length[1]

    elif (
        hasattr(focal_length, "num")
        and hasattr(focal_length, "den")
        and focal_length.den != 0
    ):
        return float(focal_length.num) / focal_length.den

    else:
        return None

_get_geotagging(exif) classmethod

Extracts geotagging information from EXIF metadata.

Parameters:

Name Type Description Default
exif dict

The EXIF metadata.

required

Returns:

Name Type Description
dict

A dictionary containing the geotagging information.

Raises:

Type Description
ValueError

If no EXIF metadata found or no GPSInfo tag found.

Source code in landlensdb/handlers/image.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
@classmethod
def _get_geotagging(cls, exif):
    """
    Extracts geotagging information from EXIF metadata.

    Args:
        exif (dict): The EXIF metadata.

    Returns:
        dict: A dictionary containing the geotagging information.

    Raises:
        ValueError: If no EXIF metadata found or no GPSInfo tag found.
    """
    if not exif:
        raise ValueError("No EXIF metadata found")

    idx = None
    for tag, label in TAGS.items():
        if label == "GPSInfo":
            idx = tag
            break

    if idx is None:
        raise ValueError("No GPSInfo tag found in TAGS.")

    gps_data = exif.get("GPSInfo", exif.get(idx, None))
    if not gps_data:
        raise ValueError("No EXIF geotagging found")

    geotagging = {}
    for key, val in GPSTAGS.items():
        data_value = gps_data.get(key) or gps_data.get(val)
        if data_value:
            geotagging[val] = data_value

    return geotagging

_get_image_altitude(geotags) classmethod

Retrieves the altitude information from geotags.

Parameters:

Name Type Description Default
geotags dict

The geotags information.

required

Returns:

Name Type Description
float

Altitude information if available, otherwise None.

Source code in landlensdb/handlers/image.py
203
204
205
206
207
208
209
210
211
212
213
214
215
216
@classmethod
def _get_image_altitude(cls, geotags):
    """
    Retrieves the altitude information from geotags.

    Args:
        geotags (dict): The geotags information.

    Returns:
        float: Altitude information if available, otherwise None.
    """
    if "GPSAltitude" in geotags:
        return geotags["GPSAltitude"]
    return None

_get_image_direction(geotags) classmethod

Retrieves the image direction information from geotags.

Parameters:

Name Type Description Default
geotags dict

The geotags information.

required

Returns:

Name Type Description
float

Image direction information if available, otherwise None.

Source code in landlensdb/handlers/image.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
@classmethod
def _get_image_direction(cls, geotags):
    """
    Retrieves the image direction information from geotags.

    Args:
        geotags (dict): The geotags information.

    Returns:
        float: Image direction information if available, otherwise None.
    """
    if "GPSImgDirection" in geotags:
        return geotags["GPSImgDirection"]
    return None

_infer_camera_type(focal_length, camera_model=None) staticmethod

Infers the camera type based on the focal length and camera model.

Parameters:

Name Type Description Default
focal_length float

The focal length of the camera.

required
camera_model str

The camera model.

None

Returns:

Name Type Description
str

One of "fisheye", "perspective", or "360-degree".

Source code in landlensdb/handlers/image.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
@staticmethod
def _infer_camera_type(focal_length, camera_model=None):
    """
    Infers the camera type based on the focal length and camera model.

    Args:
        focal_length (float): The focal length of the camera.
        camera_model (str): The camera model.

    Returns:
        str: One of "fisheye", "perspective", or "360-degree".
    """
    if not focal_length and not camera_model:
        return np.nan

    known_360_cameras = KNOWN_CAMERAS.get("360 Models", [])

    if camera_model in known_360_cameras:
        return "360-degree"

    # Further classification based on focal length
    if focal_length < 1.5:
        return "fisheye"
    else:
        return "perspective"

_to_decimal(coord_tuple) staticmethod

Converts coordinates from degrees, minutes, and seconds to decimal.

Parameters:

Name Type Description Default
coord_tuple tuple or str

The coordinate tuple to convert.

required

Returns:

Name Type Description
float

Decimal representation of the coordinates.

Source code in landlensdb/handlers/image.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
@staticmethod
def _to_decimal(coord_tuple):
    """
    Converts coordinates from degrees, minutes, and seconds to decimal.

    Args:
        coord_tuple (tuple or str): The coordinate tuple to convert.

    Returns:
        float: Decimal representation of the coordinates.
    """
    if isinstance(coord_tuple, tuple) and len(coord_tuple) == 3:
        return (
            float(coord_tuple[0])
            + float(coord_tuple[1]) / 60
            + float(coord_tuple[2]) / 3600
        )
    elif isinstance(coord_tuple, str) and "/" in coord_tuple:
        num, denom = coord_tuple.split("/")
        if float(denom) != 0:
            return float(num) / float(denom)
        else:
            return None
    return coord_tuple

create_thumbnail(image_path, size=(256, 256)) staticmethod

Creates a thumbnail for the given image while preserving aspect ratio.

Parameters:

Name Type Description Default
image_path str

Path to the original image

required
size tuple

Desired thumbnail size as (width, height). Default is (256, 256)

(256, 256)

Returns:

Name Type Description
str

Path to the created thumbnail

Raises:

Type Description
FileNotFoundError

If the image file doesn't exist

ValueError

If the image cannot be opened or processed

Source code in landlensdb/handlers/image.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
@staticmethod
def create_thumbnail(image_path, size=(256, 256)):
    """
    Creates a thumbnail for the given image while preserving aspect ratio.

    Args:
        image_path (str): Path to the original image
        size (tuple): Desired thumbnail size as (width, height). Default is (256, 256)

    Returns:
        str: Path to the created thumbnail

    Raises:
        FileNotFoundError: If the image file doesn't exist
        ValueError: If the image cannot be opened or processed
    """
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image file not found: {image_path}")

    # Create thumbnails directory in the same directory as the original image
    original_dir = os.path.dirname(image_path)
    thumbnail_dir = os.path.join(original_dir, "thumbnails")
    os.makedirs(thumbnail_dir, exist_ok=True)

    # Generate thumbnail filename
    original_filename = os.path.basename(image_path)
    thumbnail_filename = f"thumb_{original_filename}"
    thumbnail_path = os.path.join(thumbnail_dir, thumbnail_filename)

    try:
        with Image.open(image_path) as img:
            # Convert to RGB if necessary
            if img.mode in ('RGBA', 'LA'):
                img = img.convert('RGB')

            # Calculate new dimensions preserving aspect ratio
            img.thumbnail(size, Image.Resampling.LANCZOS)

            # Save thumbnail
            img.save(thumbnail_path, "JPEG", quality=85)
            return thumbnail_path

    except Exception as e:
        raise ValueError(f"Error creating thumbnail for {image_path}: {str(e)}")

get_exif_data(img) staticmethod

Retrieves the EXIF data from an image.

Parameters:

Name Type Description Default
img Image

The image to extract EXIF data from.

required

Returns:

Name Type Description
dict

A dictionary containing the EXIF data.

Source code in landlensdb/handlers/image.py
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
@staticmethod
def get_exif_data(img):
    """
    Retrieves the EXIF data from an image.

    Args:
        img (PIL.Image.Image): The image to extract EXIF data from.

    Returns:
        dict: A dictionary containing the EXIF data.
    """
    exif_data = {}
    info = img._getexif()
    if info:
        for tag, value in info.items():
            tag_name = TAGS.get(tag, tag)
            if tag_name == "GPSInfo":
                gps_info = {}
                for t in value:
                    sub_tag_name = GPSTAGS.get(t, t)
                    gps_info[sub_tag_name] = value[t]
                exif_data[tag_name] = gps_info
            else:
                exif_data[tag_name] = value
    return exif_data

load_images(directory, additional_columns=None, create_thumbnails=True, thumbnail_size=(256, 256)) classmethod

Loads images from a given directory, extracts relevant information, and returns it in a GeoImageFrame.

Parameters:

Name Type Description Default
directory str

Path to the directory containing images.

required
additional_columns list

List of additional column names or tuples containing column name and EXIF tag.

None
create_thumbnails bool

Whether to create thumbnails for the images. Defaults to True.

True
thumbnail_size tuple

Size for generated thumbnails as (width, height). Defaults to (256, 256).

(256, 256)

Returns:

Name Type Description
GeoImageFrame

Frame containing the data extracted from the images.

Raises:

Type Description
ValueError

If no valid images are found in the directory.

Examples:

1
2
>>> directory = "/path/to/images"
>>> image_data = Local.load_images(directory, create_thumbnails=True)
Source code in landlensdb/handlers/image.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
@classmethod
def load_images(cls, directory, additional_columns=None, create_thumbnails=True, thumbnail_size=(256, 256)):
    """
    Loads images from a given directory, extracts relevant information, and returns it in a GeoImageFrame.

    Args:
        directory (str): Path to the directory containing images.
        additional_columns (list, optional): List of additional column names or tuples containing column name and EXIF tag.
        create_thumbnails (bool): Whether to create thumbnails for the images. Defaults to True.
        thumbnail_size (tuple): Size for generated thumbnails as (width, height). Defaults to (256, 256).

    Returns:
        GeoImageFrame: Frame containing the data extracted from the images.

    Raises:
        ValueError: If no valid images are found in the directory.

    Examples:
        >>> directory = "/path/to/images"
        >>> image_data = Local.load_images(directory, create_thumbnails=True)
    """
    tf = TimezoneFinder()
    data = []
    valid_image_count = 0
    for root, dirs, files in os.walk(directory):
        # Skip thumbnails directory
        if "thumbnails" in dirs:
            dirs.remove("thumbnails")
        for file in files:
            if file.lower().endswith((".png", ".jpg", ".jpeg")):
                valid_image_count += 1
                filepath = os.path.join(root, file)
                img = Image.open(filepath)
                exif_data = cls.get_exif_data(img)
                try:
                    geotags = cls._get_geotagging(exif_data)
                    lat, lon = cls._get_coordinates(geotags)
                    if lat is None or lon is None:
                        raise ValueError(
                            f"Invalid coordinates for {filepath}: Latitude: {lat}, Longitude: {lon}"
                        )
                    geometry = Point(lon, lat)
                except Exception as e:
                    warnings.warn(
                        f"Error extracting geotags for {filepath}: {str(e)}. Skipped."
                    )
                    continue
                focal_length = cls._get_focal_length(exif_data)
                camera_model = cls._get_camera_model(exif_data)
                camera_type = cls._infer_camera_type(focal_length, camera_model)

                k1 = None
                k2 = None
                if None in [focal_length, k1, k2]:
                    camera_parameters = np.nan
                else:
                    camera_parameters = ",".join(
                        [str(focal_length), str(k1), str(k2)]
                    )

                captured_at_str = exif_data.get("DateTime", None)
                if captured_at_str and geometry:
                    captured_at_naive = datetime.strptime(
                        captured_at_str, "%Y:%m:%d %H:%M:%S"
                    )
                    tz_name = tf.timezone_at(lat=lat, lng=lon)
                    if tz_name:
                        local_tz = pytz.timezone(tz_name)
                        captured_at = local_tz.localize(
                            captured_at_naive
                        ).isoformat()
                    else:
                        captured_at = captured_at_naive.isoformat()
                else:
                    captured_at = None

                altitude = np.float32(cls._get_image_altitude(geotags))
                compass_angle = np.float32(cls._get_image_direction(geotags))
                exif_orientation = np.float32(exif_data.get("Orientation", None))

                # Generate thumbnail if requested
                thumb_url = None
                if create_thumbnails:
                    try:
                        # Check if thumbnail already exists
                        thumbnail_dir = os.path.join(os.path.dirname(filepath), "thumbnails")
                        thumb_filename = f"thumb_{os.path.basename(filepath)}"
                        thumb_path = os.path.join(thumbnail_dir, thumb_filename)

                        if os.path.exists(thumb_path):
                            thumb_url = thumb_path
                        else:
                            thumb_url = cls.create_thumbnail(filepath, size=thumbnail_size)
                    except Exception as e:
                        warnings.warn(f"Error creating thumbnail for {filepath}: {str(e)}")

                image_data = {
                    "name": filepath.split("/")[-1],
                    "altitude": altitude,
                    "camera_type": camera_type,
                    "camera_parameters": camera_parameters,
                    "captured_at": captured_at,
                    "compass_angle": compass_angle,
                    "exif_orientation": exif_orientation,
                    "image_url": filepath,
                    "thumb_url": thumb_url,
                    "geometry": geometry,
                }

                for column_info in additional_columns or []:
                    if isinstance(column_info, str):
                        image_data[column_info] = np.nan
                    elif isinstance(column_info, tuple):
                        col_name, exif_tag = column_info
                        image_data[col_name] = exif_data.get(exif_tag, np.nan)

                data.append(image_data)

    if valid_image_count == 0:
        raise ValueError("The directory does not contain any valid images")

    gif = GeoImageFrame(data, geometry="geometry")
    gif.set_crs(epsg=4326, inplace=True)
    return gif