2080 lines
66 KiB
Cython
2080 lines
66 KiB
Cython
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
|
|
|
|
"""IO support for OGR vector data sources
|
|
"""
|
|
|
|
|
|
import contextlib
|
|
import datetime
|
|
import locale
|
|
import logging
|
|
import math
|
|
import os
|
|
import warnings
|
|
|
|
from libc.stdint cimport uint8_t, uintptr_t
|
|
from libc.stdlib cimport malloc, free
|
|
from libc.string cimport strlen
|
|
from libc.math cimport isnan
|
|
|
|
cimport cython
|
|
import numpy as np
|
|
cimport numpy as np
|
|
|
|
from pyogrio._ogr cimport *
|
|
from pyogrio._err cimport *
|
|
from pyogrio._err import CPLE_BaseError, CPLE_NotSupportedError, NullPointerError
|
|
from pyogrio._geometry cimport get_geometry_type, get_geometry_type_code
|
|
from pyogrio.errors import CRSError, DataSourceError, DataLayerError, GeometryError, FieldError, FeatureError
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
# Mapping of OGR integer field types to Python field type names
|
|
# (index in array is the integer field type)
|
|
FIELD_TYPES = [
|
|
'int32', # OFTInteger, Simple 32bit integer
|
|
None, # OFTIntegerList, List of 32bit integers, not supported
|
|
'float64', # OFTReal, Double Precision floating point
|
|
None, # OFTRealList, List of doubles, not supported
|
|
'object', # OFTString, String of UTF-8 chars
|
|
None, # OFTStringList, Array of strings, not supported
|
|
None, # OFTWideString, deprecated, not supported
|
|
None, # OFTWideStringList, deprecated, not supported
|
|
'object', # OFTBinary, Raw Binary data
|
|
'datetime64[D]', # OFTDate, Date
|
|
None, # OFTTime, Time, NOTE: not directly supported in numpy
|
|
'datetime64[ms]',# OFTDateTime, Date and Time
|
|
'int64', # OFTInteger64, Single 64bit integer
|
|
None # OFTInteger64List, List of 64bit integers, not supported
|
|
]
|
|
|
|
FIELD_SUBTYPES = {
|
|
OFSTNone: None, # No subtype
|
|
OFSTBoolean: "bool", # Boolean integer
|
|
OFSTInt16: "int16", # Signed 16-bit integer
|
|
OFSTFloat32: "float32", # Single precision (32 bit) floating point
|
|
}
|
|
|
|
# Mapping of numpy ndarray dtypes to (field type, subtype)
|
|
DTYPE_OGR_FIELD_TYPES = {
|
|
'int8': (OFTInteger, OFSTInt16),
|
|
'int16': (OFTInteger, OFSTInt16),
|
|
'int32': (OFTInteger, OFSTNone),
|
|
'int': (OFTInteger64, OFSTNone),
|
|
'int64': (OFTInteger64, OFSTNone),
|
|
# unsigned ints have to be converted to ints; these are converted
|
|
# to the next largest integer size
|
|
'uint8': (OFTInteger, OFSTInt16),
|
|
'uint16': (OFTInteger, OFSTNone),
|
|
'uint32': (OFTInteger64, OFSTNone),
|
|
# TODO: these might get truncated, check maximum value and raise error
|
|
'uint': (OFTInteger64, OFSTNone),
|
|
'uint64': (OFTInteger64, OFSTNone),
|
|
|
|
# bool is handled as integer with boolean subtype
|
|
'bool': (OFTInteger, OFSTBoolean),
|
|
|
|
'float32': (OFTReal,OFSTFloat32),
|
|
'float': (OFTReal, OFSTNone),
|
|
'float64': (OFTReal, OFSTNone),
|
|
|
|
'datetime64[D]': (OFTDate, OFSTNone),
|
|
'datetime64': (OFTDateTime, OFSTNone),
|
|
}
|
|
|
|
|
|
cdef int start_transaction(OGRDataSourceH ogr_dataset, int force) except 1:
|
|
cdef int err = GDALDatasetStartTransaction(ogr_dataset, force)
|
|
if err == OGRERR_FAILURE:
|
|
raise DataSourceError("Failed to start transaction")
|
|
|
|
return 0
|
|
|
|
|
|
cdef int commit_transaction(OGRDataSourceH ogr_dataset) except 1:
|
|
cdef int err = GDALDatasetCommitTransaction(ogr_dataset)
|
|
if err == OGRERR_FAILURE:
|
|
raise DataSourceError("Failed to commit transaction")
|
|
|
|
return 0
|
|
|
|
|
|
# Not currently used; uncomment when used
|
|
# cdef int rollback_transaction(OGRDataSourceH ogr_dataset) except 1:
|
|
# cdef int err = GDALDatasetRollbackTransaction(ogr_dataset)
|
|
# if err == OGRERR_FAILURE:
|
|
# raise DataSourceError("Failed to rollback transaction")
|
|
|
|
# return 0
|
|
|
|
|
|
cdef char** dict_to_options(object values):
|
|
"""Convert a python dictionary into name / value pairs (stored in a char**)
|
|
|
|
Parameters
|
|
----------
|
|
values: dict
|
|
all keys and values must be strings
|
|
|
|
Returns
|
|
-------
|
|
char**
|
|
"""
|
|
cdef char **options = NULL
|
|
|
|
if values is None:
|
|
return NULL
|
|
|
|
for k, v in values.items():
|
|
k = k.encode('UTF-8')
|
|
v = v.encode('UTF-8')
|
|
options = CSLAddNameValue(options, <const char *>k, <const char *>v)
|
|
|
|
return options
|
|
|
|
|
|
cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL:
|
|
cdef void* ogr_dataset = NULL
|
|
|
|
# Force linear approximations in all cases
|
|
OGRSetNonLinearGeometriesEnabledFlag(0)
|
|
|
|
flags = GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR
|
|
if mode == 1:
|
|
flags |= GDAL_OF_UPDATE
|
|
else:
|
|
flags |= GDAL_OF_READONLY
|
|
|
|
|
|
try:
|
|
# WARNING: GDAL logs warnings about invalid open options to stderr
|
|
# instead of raising an error
|
|
ogr_dataset = exc_wrap_pointer(
|
|
GDALOpenEx(path_c, flags, NULL, <const char *const *>options, NULL)
|
|
)
|
|
|
|
return ogr_dataset
|
|
|
|
except NullPointerError:
|
|
raise DataSourceError(
|
|
"Failed to open dataset (mode={}): {}".format(mode, path_c.decode("utf-8"))
|
|
) from None
|
|
|
|
except CPLE_BaseError as exc:
|
|
if str(exc).endswith("not recognized as a supported file format."):
|
|
raise DataSourceError(
|
|
f"{str(exc)} It might help to specify the correct driver explicitly by "
|
|
"prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'."
|
|
) from None
|
|
raise DataSourceError(str(exc)) from None
|
|
|
|
|
|
cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL:
|
|
"""Open OGR layer by index or name.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_dataset : pointer to open OGR dataset
|
|
layer : str or int
|
|
name or index of layer
|
|
|
|
Returns
|
|
-------
|
|
pointer to OGR layer
|
|
"""
|
|
cdef OGRLayerH ogr_layer = NULL
|
|
|
|
try:
|
|
if isinstance(layer, str):
|
|
name_b = layer.encode('utf-8')
|
|
name_c = name_b
|
|
ogr_layer = exc_wrap_pointer(GDALDatasetGetLayerByName(ogr_dataset, name_c))
|
|
|
|
elif isinstance(layer, int):
|
|
ogr_layer = exc_wrap_pointer(GDALDatasetGetLayer(ogr_dataset, layer))
|
|
|
|
# GDAL does not always raise exception messages in this case
|
|
except NullPointerError:
|
|
raise DataLayerError(f"Layer '{layer}' could not be opened") from None
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise DataLayerError(str(exc))
|
|
|
|
# if the driver is OSM, we need to execute SQL to set the layer to read in
|
|
# order to read it properly
|
|
if get_driver(ogr_dataset) == "OSM":
|
|
# Note: this returns NULL and does not need to be freed via
|
|
# GDALDatasetReleaseResultSet()
|
|
layer_name = get_string(OGR_L_GetName(ogr_layer))
|
|
sql_b = f"SET interest_layers = {layer_name}".encode('utf-8')
|
|
sql_c = sql_b
|
|
|
|
GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL)
|
|
|
|
return ogr_layer
|
|
|
|
|
|
cdef OGRLayerH execute_sql(GDALDatasetH ogr_dataset, str sql, str sql_dialect=None) except NULL:
|
|
"""Execute an SQL statement on a dataset.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_dataset : pointer to open OGR dataset
|
|
sql : str
|
|
The sql statement to execute
|
|
sql_dialect : str, optional (default: None)
|
|
The sql dialect the sql statement is written in
|
|
|
|
Returns
|
|
-------
|
|
pointer to OGR layer
|
|
"""
|
|
|
|
try:
|
|
sql_b = sql.encode('utf-8')
|
|
sql_c = sql_b
|
|
if sql_dialect is None:
|
|
return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL))
|
|
|
|
sql_dialect_b = sql_dialect.encode('utf-8')
|
|
sql_dialect_c = sql_dialect_b
|
|
return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, sql_dialect_c))
|
|
|
|
# GDAL does not always raise exception messages in this case
|
|
except NullPointerError:
|
|
raise DataLayerError(f"Error executing sql '{sql}'") from None
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise DataLayerError(str(exc))
|
|
|
|
|
|
cdef str get_crs(OGRLayerH ogr_layer):
|
|
"""Read CRS from layer as EPSG:<code> if available or WKT.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_layer : pointer to open OGR layer
|
|
|
|
Returns
|
|
-------
|
|
str or None
|
|
EPSG:<code> or WKT
|
|
"""
|
|
cdef void *ogr_crs = NULL
|
|
cdef const char *authority_key = NULL
|
|
cdef const char *authority_val = NULL
|
|
cdef char *ogr_wkt = NULL
|
|
|
|
try:
|
|
ogr_crs = exc_wrap_pointer(OGR_L_GetSpatialRef(ogr_layer))
|
|
|
|
except NullPointerError:
|
|
# No coordinate system defined.
|
|
# This is expected and valid for nonspatial tables.
|
|
return None
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise CRSError(str(exc))
|
|
|
|
# If CRS can be decoded to an EPSG code, use that.
|
|
# The following pointers will be NULL if it cannot be decoded.
|
|
retval = OSRAutoIdentifyEPSG(ogr_crs)
|
|
authority_key = <const char *>OSRGetAuthorityName(ogr_crs, NULL)
|
|
authority_val = <const char *>OSRGetAuthorityCode(ogr_crs, NULL)
|
|
|
|
if authority_key != NULL and authority_val != NULL:
|
|
key = get_string(authority_key)
|
|
if key == 'EPSG':
|
|
value = get_string(authority_val)
|
|
return f"EPSG:{value}"
|
|
|
|
try:
|
|
OSRExportToWkt(ogr_crs, &ogr_wkt)
|
|
if ogr_wkt == NULL:
|
|
raise CRSError("CRS could not be extracted as WKT") from None
|
|
|
|
wkt = get_string(ogr_wkt)
|
|
|
|
finally:
|
|
CPLFree(ogr_wkt)
|
|
return wkt
|
|
|
|
|
|
cdef get_driver(OGRDataSourceH ogr_dataset):
|
|
"""Get the driver for a dataset.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_dataset : pointer to open OGR dataset
|
|
Returns
|
|
-------
|
|
str or None
|
|
"""
|
|
cdef void *ogr_driver
|
|
|
|
try:
|
|
ogr_driver = exc_wrap_pointer(GDALGetDatasetDriver(ogr_dataset))
|
|
|
|
except NullPointerError:
|
|
raise DataLayerError(f"Could not detect driver of dataset") from None
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise DataLayerError(str(exc))
|
|
|
|
driver = OGR_Dr_GetName(ogr_driver).decode("UTF-8")
|
|
return driver
|
|
|
|
|
|
cdef get_feature_count(OGRLayerH ogr_layer, int force):
|
|
"""Get the feature count of a layer.
|
|
|
|
If GDAL returns an unknown count (-1), this iterates over every feature
|
|
to calculate the count.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_layer : pointer to open OGR layer
|
|
force : bool
|
|
True if the feature count should be computed even if it is expensive
|
|
|
|
Returns
|
|
-------
|
|
int
|
|
count of features
|
|
"""
|
|
|
|
cdef OGRFeatureH ogr_feature = NULL
|
|
cdef int feature_count = OGR_L_GetFeatureCount(ogr_layer, force)
|
|
|
|
# if GDAL refuses to give us the feature count, we have to loop over all
|
|
# features ourselves and get the count. This can happen for some drivers
|
|
# (e.g., OSM) or if a where clause is invalid but not rejected as error
|
|
if force and feature_count == -1:
|
|
# make sure layer is read from beginning
|
|
OGR_L_ResetReading(ogr_layer)
|
|
|
|
feature_count = 0
|
|
while True:
|
|
try:
|
|
ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))
|
|
feature_count +=1
|
|
|
|
except NullPointerError:
|
|
# No more rows available, so stop reading
|
|
break
|
|
|
|
# driver may raise other errors, e.g., for OSM if node ids are not
|
|
# increasing, the default config option OSM_USE_CUSTOM_INDEXING=YES
|
|
# causes errors iterating over features
|
|
except CPLE_BaseError as exc:
|
|
# if an invalid where clause is used for a GPKG file, it is not
|
|
# caught as an error until attempting to iterate over features;
|
|
# catch it here
|
|
if "failed to prepare SQL" in str(exc):
|
|
raise ValueError(f"Invalid SQL query: {str(exc)}") from None
|
|
|
|
raise DataLayerError(f"Could not iterate over features: {str(exc)}") from None
|
|
|
|
finally:
|
|
if ogr_feature != NULL:
|
|
OGR_F_Destroy(ogr_feature)
|
|
ogr_feature = NULL
|
|
|
|
return feature_count
|
|
|
|
|
|
cdef get_total_bounds(OGRLayerH ogr_layer, int force):
|
|
"""Get the total bounds of a layer.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_layer : pointer to open OGR layer
|
|
force : bool
|
|
True if the total bounds should be computed even if it is expensive
|
|
|
|
Returns
|
|
-------
|
|
tuple of (xmin, ymin, xmax, ymax) or None
|
|
The total bounds of the layer, or None if they could not be determined.
|
|
"""
|
|
|
|
cdef OGREnvelope ogr_envelope
|
|
try:
|
|
exc_wrap_ogrerr(OGR_L_GetExtent(ogr_layer, &ogr_envelope, force))
|
|
bounds = (
|
|
ogr_envelope.MinX, ogr_envelope.MinY, ogr_envelope.MaxX, ogr_envelope.MaxY
|
|
)
|
|
|
|
except CPLE_BaseError:
|
|
bounds = None
|
|
|
|
return bounds
|
|
|
|
|
|
cdef set_metadata(GDALMajorObjectH obj, object metadata):
|
|
"""Set metadata on a dataset or layer
|
|
|
|
Parameters
|
|
----------
|
|
obj : pointer to dataset or layer
|
|
metadata : dict, optional (default None)
|
|
keys and values must be strings
|
|
"""
|
|
|
|
cdef char **metadata_items = NULL
|
|
cdef int err = 0
|
|
|
|
metadata_items = dict_to_options(metadata)
|
|
if metadata_items != NULL:
|
|
# only default namepace is currently supported
|
|
err = GDALSetMetadata(obj, metadata_items, NULL)
|
|
|
|
CSLDestroy(metadata_items)
|
|
metadata_items = NULL
|
|
|
|
if err:
|
|
raise RuntimeError("Could not set metadata") from None
|
|
|
|
cdef get_metadata(GDALMajorObjectH obj):
|
|
"""Get metadata for a dataset or layer
|
|
|
|
Parameters
|
|
----------
|
|
obj : pointer to dataset or layer
|
|
|
|
Returns
|
|
-------
|
|
dict or None
|
|
metadata as key, value pairs
|
|
"""
|
|
# only default namespace is currently supported
|
|
cdef char **metadata = GDALGetMetadata(obj, NULL)
|
|
|
|
if metadata != NULL:
|
|
return dict(
|
|
metadata[i].decode('UTF-8').split('=', 1)
|
|
for i in range(CSLCount(metadata))
|
|
)
|
|
|
|
return None
|
|
|
|
|
|
cdef detect_encoding(OGRDataSourceH ogr_dataset, OGRLayerH ogr_layer):
|
|
"""Attempt to detect the encoding of the layer.
|
|
If it supports UTF-8, use that.
|
|
If it is a shapefile, it must otherwise be ISO-8859-1.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_dataset : pointer to open OGR dataset
|
|
ogr_layer : pointer to open OGR layer
|
|
|
|
Returns
|
|
-------
|
|
str or None
|
|
"""
|
|
|
|
if OGR_L_TestCapability(ogr_layer, OLCStringsAsUTF8):
|
|
return 'UTF-8'
|
|
|
|
driver = get_driver(ogr_dataset)
|
|
if driver == 'ESRI Shapefile':
|
|
return 'ISO-8859-1'
|
|
|
|
if driver == "OSM":
|
|
# always set OSM data to UTF-8
|
|
# per https://help.openstreetmap.org/questions/2172/what-encoding-does-openstreetmap-use
|
|
return "UTF-8"
|
|
|
|
return None
|
|
|
|
|
|
cdef get_fields(OGRLayerH ogr_layer, str encoding, use_arrow=False):
|
|
"""Get field names and types for layer.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_layer : pointer to open OGR layer
|
|
encoding : str
|
|
encoding to use when reading field name
|
|
use_arrow : bool, default False
|
|
If using arrow, all types are supported, and we don't have to
|
|
raise warnings
|
|
|
|
Returns
|
|
-------
|
|
ndarray(n, 4)
|
|
array of index, ogr type, name, numpy type
|
|
"""
|
|
cdef int i
|
|
cdef int field_count
|
|
cdef OGRFeatureDefnH ogr_featuredef = NULL
|
|
cdef OGRFieldDefnH ogr_fielddef = NULL
|
|
cdef int field_subtype
|
|
cdef const char *key_c
|
|
|
|
try:
|
|
ogr_featuredef = exc_wrap_pointer(OGR_L_GetLayerDefn(ogr_layer))
|
|
|
|
except NullPointerError:
|
|
raise DataLayerError("Could not get layer definition") from None
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise DataLayerError(str(exc))
|
|
|
|
field_count = OGR_FD_GetFieldCount(ogr_featuredef)
|
|
|
|
fields = np.empty(shape=(field_count, 4), dtype=object)
|
|
fields_view = fields[:,:]
|
|
|
|
skipped_fields = False
|
|
|
|
for i in range(field_count):
|
|
try:
|
|
ogr_fielddef = exc_wrap_pointer(OGR_FD_GetFieldDefn(ogr_featuredef, i))
|
|
|
|
except NullPointerError:
|
|
raise FieldError(f"Could not get field definition for field at index {i}") from None
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise FieldError(str(exc))
|
|
|
|
field_name = get_string(OGR_Fld_GetNameRef(ogr_fielddef), encoding=encoding)
|
|
|
|
field_type = OGR_Fld_GetType(ogr_fielddef)
|
|
np_type = FIELD_TYPES[field_type]
|
|
if not np_type and not use_arrow:
|
|
skipped_fields = True
|
|
log.warning(
|
|
f"Skipping field {field_name}: unsupported OGR type: {field_type}")
|
|
continue
|
|
|
|
field_subtype = OGR_Fld_GetSubType(ogr_fielddef)
|
|
subtype = FIELD_SUBTYPES.get(field_subtype)
|
|
if subtype is not None:
|
|
# bool, int16, float32 dtypes
|
|
np_type = subtype
|
|
|
|
fields_view[i,0] = i
|
|
fields_view[i,1] = field_type
|
|
fields_view[i,2] = field_name
|
|
fields_view[i,3] = np_type
|
|
|
|
if skipped_fields:
|
|
# filter out skipped fields
|
|
mask = np.array([idx is not None for idx in fields[:, 0]])
|
|
fields = fields[mask]
|
|
|
|
return fields
|
|
|
|
|
|
cdef apply_where_filter(OGRLayerH ogr_layer, str where):
|
|
"""Applies where filter to layer.
|
|
|
|
WARNING: GDAL does not raise an error for GPKG when SQL query is invalid
|
|
but instead only logs to stderr.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_layer : pointer to open OGR layer
|
|
where : str
|
|
See http://ogdi.sourceforge.net/prop/6.2.CapabilitiesMetadata.html
|
|
restricted_where for more information about valid expressions.
|
|
|
|
Raises
|
|
------
|
|
ValueError: if SQL query is not valid
|
|
"""
|
|
|
|
where_b = where.encode('utf-8')
|
|
where_c = where_b
|
|
err = OGR_L_SetAttributeFilter(ogr_layer, where_c)
|
|
# WARNING: GDAL does not raise this error for GPKG but instead only
|
|
# logs to stderr
|
|
if err != OGRERR_NONE:
|
|
try:
|
|
exc_check()
|
|
except CPLE_BaseError as exc:
|
|
raise ValueError(str(exc))
|
|
|
|
raise ValueError(f"Invalid SQL query for layer '{OGR_L_GetName(ogr_layer)}': '{where}'")
|
|
|
|
|
|
cdef apply_bbox_filter(OGRLayerH ogr_layer, bbox):
|
|
"""Applies bounding box spatial filter to layer.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_layer : pointer to open OGR layer
|
|
bbox: list or tuple of xmin, ymin, xmax, ymax
|
|
|
|
Raises
|
|
------
|
|
ValueError: if bbox is not a list or tuple or does not have proper number of
|
|
items
|
|
"""
|
|
|
|
if not (isinstance(bbox, (tuple, list)) and len(bbox) == 4):
|
|
raise ValueError(f"Invalid bbox: {bbox}")
|
|
|
|
xmin, ymin, xmax, ymax = bbox
|
|
OGR_L_SetSpatialFilterRect(ogr_layer, xmin, ymin, xmax, ymax)
|
|
|
|
|
|
cdef apply_geometry_filter(OGRLayerH ogr_layer, wkb):
|
|
"""Applies geometry spatial filter to layer.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_layer : pointer to open OGR layer
|
|
wkb: WKB encoding of geometry
|
|
"""
|
|
|
|
cdef OGRGeometryH ogr_geometry = NULL
|
|
cdef unsigned char *wkb_buffer = wkb
|
|
|
|
err = OGR_G_CreateFromWkb(wkb_buffer, NULL, &ogr_geometry, len(wkb))
|
|
if err:
|
|
if ogr_geometry != NULL:
|
|
OGR_G_DestroyGeometry(ogr_geometry)
|
|
raise GeometryError("Could not create mask geometry") from None
|
|
|
|
OGR_L_SetSpatialFilter(ogr_layer, ogr_geometry)
|
|
OGR_G_DestroyGeometry(ogr_geometry)
|
|
|
|
|
|
cdef validate_feature_range(OGRLayerH ogr_layer, int skip_features=0, int max_features=0):
|
|
"""Limit skip_features and max_features to bounds available for dataset.
|
|
|
|
This is typically performed after applying where and spatial filters, which
|
|
reduce the available range of features.
|
|
|
|
Parameters
|
|
----------
|
|
ogr_layer : pointer to open OGR layer
|
|
skip_features : number of features to skip from beginning of available range
|
|
max_features : maximum number of features to read from available range
|
|
"""
|
|
|
|
feature_count = get_feature_count(ogr_layer, 1)
|
|
num_features = max_features
|
|
|
|
if feature_count == 0:
|
|
return 0, 0
|
|
|
|
if skip_features >= feature_count:
|
|
skip_features = feature_count
|
|
|
|
elif max_features == 0:
|
|
num_features = feature_count - skip_features
|
|
|
|
elif max_features > feature_count:
|
|
num_features = feature_count
|
|
|
|
return skip_features, num_features
|
|
|
|
|
|
@cython.boundscheck(False) # Deactivate bounds checking
|
|
@cython.wraparound(False) # Deactivate negative indexing.
|
|
cdef process_geometry(OGRFeatureH ogr_feature, int i, geom_view, uint8_t force_2d):
|
|
|
|
cdef OGRGeometryH ogr_geometry = NULL
|
|
cdef OGRwkbGeometryType ogr_geometry_type
|
|
|
|
cdef unsigned char *wkb = NULL
|
|
cdef int ret_length
|
|
|
|
ogr_geometry = OGR_F_GetGeometryRef(ogr_feature)
|
|
|
|
if ogr_geometry == NULL:
|
|
geom_view[i] = None
|
|
else:
|
|
try:
|
|
ogr_geometry_type = OGR_G_GetGeometryType(ogr_geometry)
|
|
|
|
# if geometry has M values, these need to be removed first
|
|
if (OGR_G_IsMeasured(ogr_geometry)):
|
|
OGR_G_SetMeasured(ogr_geometry, 0)
|
|
|
|
if force_2d and OGR_G_Is3D(ogr_geometry):
|
|
OGR_G_Set3D(ogr_geometry, 0)
|
|
|
|
# if non-linear (e.g., curve), force to linear type
|
|
if OGR_GT_IsNonLinear(ogr_geometry_type):
|
|
ogr_geometry = OGR_G_GetLinearGeometry(ogr_geometry, 0, NULL)
|
|
|
|
ret_length = OGR_G_WkbSize(ogr_geometry)
|
|
wkb = <unsigned char*>malloc(sizeof(unsigned char)*ret_length)
|
|
OGR_G_ExportToWkb(ogr_geometry, 1, wkb)
|
|
geom_view[i] = wkb[:ret_length]
|
|
|
|
finally:
|
|
free(wkb)
|
|
|
|
|
|
@cython.boundscheck(False) # Deactivate bounds checking
|
|
@cython.wraparound(False) # Deactivate negative indexing.
|
|
cdef process_fields(
|
|
OGRFeatureH ogr_feature,
|
|
int i,
|
|
int n_fields,
|
|
object field_data,
|
|
object field_data_view,
|
|
object field_indexes,
|
|
object field_ogr_types,
|
|
encoding,
|
|
bint datetime_as_string
|
|
):
|
|
cdef int j
|
|
cdef int success
|
|
cdef int field_index
|
|
cdef int ret_length
|
|
cdef GByte *bin_value
|
|
cdef int year = 0
|
|
cdef int month = 0
|
|
cdef int day = 0
|
|
cdef int hour = 0
|
|
cdef int minute = 0
|
|
cdef float fsecond = 0.0
|
|
cdef int timezone = 0
|
|
|
|
for j in range(n_fields):
|
|
field_index = field_indexes[j]
|
|
field_type = field_ogr_types[j]
|
|
data = field_data_view[j]
|
|
|
|
isnull = OGR_F_IsFieldSetAndNotNull(ogr_feature, field_index) == 0
|
|
if isnull:
|
|
if field_type in (OFTInteger, OFTInteger64, OFTReal):
|
|
# if a boolean or integer type, have to cast to float to hold
|
|
# NaN values
|
|
if data.dtype.kind in ('b', 'i', 'u'):
|
|
field_data[j] = field_data[j].astype(np.float64)
|
|
field_data_view[j] = field_data[j][:]
|
|
field_data_view[j][i] = np.nan
|
|
else:
|
|
data[i] = np.nan
|
|
|
|
elif field_type in ( OFTDate, OFTDateTime) and not datetime_as_string:
|
|
data[i] = np.datetime64('NaT')
|
|
|
|
else:
|
|
data[i] = None
|
|
|
|
continue
|
|
|
|
if field_type == OFTInteger:
|
|
data[i] = OGR_F_GetFieldAsInteger(ogr_feature, field_index)
|
|
|
|
elif field_type == OFTInteger64:
|
|
data[i] = OGR_F_GetFieldAsInteger64(ogr_feature, field_index)
|
|
|
|
elif field_type == OFTReal:
|
|
data[i] = OGR_F_GetFieldAsDouble(ogr_feature, field_index)
|
|
|
|
elif field_type == OFTString:
|
|
value = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
|
|
data[i] = value
|
|
|
|
elif field_type == OFTBinary:
|
|
bin_value = OGR_F_GetFieldAsBinary(ogr_feature, field_index, &ret_length)
|
|
data[i] = bin_value[:ret_length]
|
|
|
|
elif field_type == OFTDateTime or field_type == OFTDate:
|
|
|
|
if datetime_as_string:
|
|
# defer datetime parsing to user/ pandas layer
|
|
# Update to OGR_F_GetFieldAsISO8601DateTime when GDAL 3.7+ only
|
|
data[i] = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
|
|
else:
|
|
success = OGR_F_GetFieldAsDateTimeEx(
|
|
ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone)
|
|
|
|
ms, ss = math.modf(fsecond)
|
|
second = int(ss)
|
|
# fsecond has millisecond accuracy
|
|
microsecond = round(ms * 1000) * 1000
|
|
|
|
if not success:
|
|
data[i] = np.datetime64('NaT')
|
|
|
|
elif field_type == OFTDate:
|
|
data[i] = datetime.date(year, month, day).isoformat()
|
|
|
|
elif field_type == OFTDateTime:
|
|
data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat()
|
|
|
|
|
|
@cython.boundscheck(False) # Deactivate bounds checking
|
|
@cython.wraparound(False) # Deactivate negative indexing.
|
|
cdef get_features(
|
|
OGRLayerH ogr_layer,
|
|
object[:,:] fields,
|
|
encoding,
|
|
uint8_t read_geometry,
|
|
uint8_t force_2d,
|
|
int skip_features,
|
|
int num_features,
|
|
uint8_t return_fids,
|
|
bint datetime_as_string
|
|
):
|
|
|
|
cdef OGRFeatureH ogr_feature = NULL
|
|
cdef int n_fields
|
|
cdef int i
|
|
cdef int field_index
|
|
|
|
# make sure layer is read from beginning
|
|
OGR_L_ResetReading(ogr_layer)
|
|
|
|
if skip_features > 0:
|
|
OGR_L_SetNextByIndex(ogr_layer, skip_features)
|
|
|
|
if return_fids:
|
|
fid_data = np.empty(shape=(num_features), dtype=np.int64)
|
|
fid_view = fid_data[:]
|
|
else:
|
|
fid_data = None
|
|
|
|
if read_geometry:
|
|
geometries = np.empty(shape=(num_features, ), dtype='object')
|
|
geom_view = geometries[:]
|
|
|
|
else:
|
|
geometries = None
|
|
|
|
n_fields = fields.shape[0]
|
|
field_indexes = fields[:,0]
|
|
field_ogr_types = fields[:,1]
|
|
|
|
field_data = [
|
|
np.empty(shape=(num_features, ),
|
|
dtype = ("object" if datetime_as_string and
|
|
fields[field_index,3].startswith("datetime") else fields[field_index,3])
|
|
) for field_index in range(n_fields)
|
|
]
|
|
|
|
field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
|
|
|
|
if num_features == 0:
|
|
return fid_data, geometries, field_data
|
|
|
|
i = 0
|
|
while True:
|
|
try:
|
|
if num_features > 0 and i == num_features:
|
|
break
|
|
|
|
try:
|
|
ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))
|
|
|
|
except NullPointerError:
|
|
# No more rows available, so stop reading
|
|
break
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise FeatureError(str(exc))
|
|
|
|
if i >= num_features:
|
|
raise FeatureError(
|
|
"GDAL returned more records than expected based on the count of "
|
|
"records that may meet your combination of filters against this "
|
|
"dataset. Please open an issue on Github "
|
|
"(https://github.com/geopandas/pyogrio/issues) to report encountering "
|
|
"this error."
|
|
) from None
|
|
|
|
if return_fids:
|
|
fid_view[i] = OGR_F_GetFID(ogr_feature)
|
|
|
|
if read_geometry:
|
|
process_geometry(ogr_feature, i, geom_view, force_2d)
|
|
|
|
process_fields(
|
|
ogr_feature, i, n_fields, field_data, field_data_view,
|
|
field_indexes, field_ogr_types, encoding, datetime_as_string
|
|
)
|
|
i += 1
|
|
finally:
|
|
if ogr_feature != NULL:
|
|
OGR_F_Destroy(ogr_feature)
|
|
ogr_feature = NULL
|
|
|
|
# There may be fewer rows available than expected from OGR_L_GetFeatureCount,
|
|
# such as features with bounding boxes that intersect the bbox
|
|
# but do not themselves intersect the bbox.
|
|
# Empty rows are dropped.
|
|
if i < num_features:
|
|
if return_fids:
|
|
fid_data = fid_data[:i]
|
|
if read_geometry:
|
|
geometries = geometries[:i]
|
|
field_data = [data_field[:i] for data_field in field_data]
|
|
|
|
return fid_data, geometries, field_data
|
|
|
|
|
|
@cython.boundscheck(False) # Deactivate bounds checking
|
|
@cython.wraparound(False) # Deactivate negative indexing.
|
|
cdef get_features_by_fid(
|
|
OGRLayerH ogr_layer,
|
|
int[:] fids,
|
|
object[:,:] fields,
|
|
encoding,
|
|
uint8_t read_geometry,
|
|
uint8_t force_2d,
|
|
bint datetime_as_string
|
|
):
|
|
|
|
cdef OGRFeatureH ogr_feature = NULL
|
|
cdef int n_fields
|
|
cdef int i
|
|
cdef int fid
|
|
cdef int field_index
|
|
cdef int count = len(fids)
|
|
|
|
# make sure layer is read from beginning
|
|
OGR_L_ResetReading(ogr_layer)
|
|
|
|
if read_geometry:
|
|
geometries = np.empty(shape=(count, ), dtype='object')
|
|
geom_view = geometries[:]
|
|
|
|
else:
|
|
geometries = None
|
|
|
|
n_fields = fields.shape[0]
|
|
field_indexes = fields[:,0]
|
|
field_ogr_types = fields[:,1]
|
|
field_data = [
|
|
np.empty(shape=(count, ),
|
|
dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime")
|
|
else fields[field_index,3]))
|
|
for field_index in range(n_fields)
|
|
]
|
|
|
|
field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
|
|
|
|
for i in range(count):
|
|
try:
|
|
fid = fids[i]
|
|
|
|
try:
|
|
ogr_feature = exc_wrap_pointer(OGR_L_GetFeature(ogr_layer, fid))
|
|
|
|
except NullPointerError:
|
|
raise FeatureError(f"Could not read feature with fid {fid}") from None
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise FeatureError(str(exc))
|
|
|
|
if read_geometry:
|
|
process_geometry(ogr_feature, i, geom_view, force_2d)
|
|
|
|
process_fields(
|
|
ogr_feature, i, n_fields, field_data, field_data_view,
|
|
field_indexes, field_ogr_types, encoding, datetime_as_string
|
|
)
|
|
finally:
|
|
if ogr_feature != NULL:
|
|
OGR_F_Destroy(ogr_feature)
|
|
ogr_feature = NULL
|
|
|
|
|
|
return (geometries, field_data)
|
|
|
|
|
|
@cython.boundscheck(False) # Deactivate bounds checking
|
|
@cython.wraparound(False) # Deactivate negative indexing.
|
|
cdef get_bounds(
|
|
OGRLayerH ogr_layer,
|
|
int skip_features,
|
|
int num_features):
|
|
|
|
cdef OGRFeatureH ogr_feature = NULL
|
|
cdef OGRGeometryH ogr_geometry = NULL
|
|
cdef OGREnvelope ogr_envelope # = NULL
|
|
cdef int i
|
|
|
|
# make sure layer is read from beginning
|
|
OGR_L_ResetReading(ogr_layer)
|
|
|
|
if skip_features > 0:
|
|
OGR_L_SetNextByIndex(ogr_layer, skip_features)
|
|
|
|
fid_data = np.empty(shape=(num_features), dtype=np.int64)
|
|
fid_view = fid_data[:]
|
|
|
|
bounds_data = np.empty(shape=(4, num_features), dtype='float64')
|
|
bounds_view = bounds_data[:]
|
|
|
|
i = 0
|
|
while True:
|
|
try:
|
|
if num_features > 0 and i == num_features:
|
|
break
|
|
|
|
try:
|
|
ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))
|
|
|
|
except NullPointerError:
|
|
# No more rows available, so stop reading
|
|
break
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise FeatureError(str(exc))
|
|
|
|
if i >= num_features:
|
|
raise FeatureError(
|
|
"Reading more features than indicated by OGR_L_GetFeatureCount is not supported"
|
|
) from None
|
|
|
|
fid_view[i] = OGR_F_GetFID(ogr_feature)
|
|
|
|
ogr_geometry = OGR_F_GetGeometryRef(ogr_feature)
|
|
|
|
if ogr_geometry == NULL:
|
|
bounds_view[:,i] = np.nan
|
|
|
|
else:
|
|
OGR_G_GetEnvelope(ogr_geometry, &ogr_envelope)
|
|
bounds_view[0, i] = ogr_envelope.MinX
|
|
bounds_view[1, i] = ogr_envelope.MinY
|
|
bounds_view[2, i] = ogr_envelope.MaxX
|
|
bounds_view[3, i] = ogr_envelope.MaxY
|
|
|
|
i += 1
|
|
finally:
|
|
if ogr_feature != NULL:
|
|
OGR_F_Destroy(ogr_feature)
|
|
ogr_feature = NULL
|
|
|
|
# Less rows read than anticipated, so drop empty rows
|
|
if i < num_features:
|
|
fid_data = fid_data[:i]
|
|
bounds_data = bounds_data[:, :i]
|
|
|
|
return fid_data, bounds_data
|
|
|
|
|
|
def ogr_read(
|
|
str path,
|
|
object dataset_kwargs,
|
|
object layer=None,
|
|
object encoding=None,
|
|
int read_geometry=True,
|
|
int force_2d=False,
|
|
object columns=None,
|
|
int skip_features=0,
|
|
int max_features=0,
|
|
object where=None,
|
|
tuple bbox=None,
|
|
object mask=None,
|
|
object fids=None,
|
|
str sql=None,
|
|
str sql_dialect=None,
|
|
int return_fids=False,
|
|
bint datetime_as_string=False
|
|
):
|
|
|
|
cdef int err = 0
|
|
cdef const char *path_c = NULL
|
|
cdef char **dataset_options = NULL
|
|
cdef const char *where_c = NULL
|
|
cdef const char *field_c = NULL
|
|
cdef char **fields_c = NULL
|
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
cdef OGRLayerH ogr_layer = NULL
|
|
cdef int feature_count = 0
|
|
cdef double xmin, ymin, xmax, ymax
|
|
|
|
path_b = path.encode('utf-8')
|
|
path_c = path_b
|
|
|
|
if fids is not None:
|
|
if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features:
|
|
raise ValueError(
|
|
"cannot set both 'fids' and any of 'where', 'bbox', 'mask', "
|
|
"'sql', 'skip_features' or 'max_features'"
|
|
)
|
|
fids = np.asarray(fids, dtype=np.intc)
|
|
|
|
if sql is not None and layer is not None:
|
|
raise ValueError("'sql' paramater cannot be combined with 'layer'")
|
|
|
|
if not (read_geometry or return_fids or columns is None or len(columns) > 0):
|
|
raise ValueError(
|
|
"at least one of read_geometry or return_fids must be True or columns must "
|
|
"be None or non-empty"
|
|
)
|
|
|
|
if bbox and mask:
|
|
raise ValueError("cannot set both 'bbox' and 'mask'")
|
|
|
|
if skip_features < 0:
|
|
raise ValueError("'skip_features' must be >= 0")
|
|
|
|
if max_features < 0:
|
|
raise ValueError("'max_features' must be >= 0")
|
|
|
|
try:
|
|
dataset_options = dict_to_options(dataset_kwargs)
|
|
ogr_dataset = ogr_open(path_c, 0, dataset_options)
|
|
|
|
if sql is None:
|
|
# layer defaults to index 0
|
|
if layer is None:
|
|
layer = 0
|
|
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
else:
|
|
ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
|
|
|
|
crs = get_crs(ogr_layer)
|
|
|
|
# Encoding is derived from the user, from the dataset capabilities / type,
|
|
# or from the system locale
|
|
encoding = (
|
|
encoding
|
|
or detect_encoding(ogr_dataset, ogr_layer)
|
|
or locale.getpreferredencoding()
|
|
)
|
|
|
|
fields = get_fields(ogr_layer, encoding)
|
|
|
|
ignored_fields = []
|
|
if columns is not None:
|
|
# Fields are matched exactly by name, duplicates are dropped.
|
|
# Find index of each field into fields
|
|
idx = np.intersect1d(fields[:,2], columns, return_indices=True)[1]
|
|
fields = fields[idx, :]
|
|
|
|
ignored_fields = list(set(fields[:,2]) - set(columns))
|
|
|
|
if not read_geometry:
|
|
ignored_fields.append("OGR_GEOMETRY")
|
|
|
|
# Instruct GDAL to ignore reading fields not
|
|
# included in output columns for faster I/O
|
|
if ignored_fields:
|
|
for field in ignored_fields:
|
|
field_b = field.encode("utf-8")
|
|
field_c = field_b
|
|
fields_c = CSLAddString(fields_c, field_c)
|
|
|
|
OGR_L_SetIgnoredFields(ogr_layer, <const char**>fields_c)
|
|
|
|
geometry_type = get_geometry_type(ogr_layer)
|
|
|
|
if fids is not None:
|
|
geometries, field_data = get_features_by_fid(
|
|
ogr_layer,
|
|
fids,
|
|
fields,
|
|
encoding,
|
|
read_geometry=read_geometry and geometry_type is not None,
|
|
force_2d=force_2d,
|
|
datetime_as_string=datetime_as_string
|
|
)
|
|
|
|
# bypass reading fids since these should match fids used for read
|
|
if return_fids:
|
|
fid_data = fids.astype(np.int64)
|
|
else:
|
|
fid_data = None
|
|
else:
|
|
# Apply the attribute filter
|
|
if where is not None and where != "":
|
|
apply_where_filter(ogr_layer, where)
|
|
|
|
# Apply the spatial filter
|
|
if bbox is not None:
|
|
apply_bbox_filter(ogr_layer, bbox)
|
|
|
|
elif mask is not None:
|
|
apply_geometry_filter(ogr_layer, mask)
|
|
|
|
# Limit feature range to available range
|
|
skip_features, num_features = validate_feature_range(
|
|
ogr_layer, skip_features, max_features
|
|
)
|
|
|
|
fid_data, geometries, field_data = get_features(
|
|
ogr_layer,
|
|
fields,
|
|
encoding,
|
|
read_geometry=read_geometry and geometry_type is not None,
|
|
force_2d=force_2d,
|
|
skip_features=skip_features,
|
|
num_features=num_features,
|
|
return_fids=return_fids,
|
|
datetime_as_string=datetime_as_string
|
|
)
|
|
|
|
meta = {
|
|
'crs': crs,
|
|
'encoding': encoding,
|
|
'fields': fields[:,2], # return only names
|
|
'dtypes':fields[:,3],
|
|
'geometry_type': geometry_type,
|
|
}
|
|
|
|
finally:
|
|
if dataset_options != NULL:
|
|
CSLDestroy(dataset_options)
|
|
dataset_options = NULL
|
|
|
|
if ogr_dataset != NULL:
|
|
if sql is not None:
|
|
GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
|
|
|
|
GDALClose(ogr_dataset)
|
|
ogr_dataset = NULL
|
|
|
|
return (
|
|
meta,
|
|
fid_data,
|
|
geometries,
|
|
field_data
|
|
)
|
|
|
|
@contextlib.contextmanager
|
|
def ogr_open_arrow(
|
|
str path,
|
|
dataset_kwargs,
|
|
object layer=None,
|
|
object encoding=None,
|
|
int read_geometry=True,
|
|
int force_2d=False,
|
|
object columns=None,
|
|
int skip_features=0,
|
|
int max_features=0,
|
|
object where=None,
|
|
tuple bbox=None,
|
|
object mask=None,
|
|
object fids=None,
|
|
str sql=None,
|
|
str sql_dialect=None,
|
|
int return_fids=False,
|
|
int batch_size=0):
|
|
|
|
cdef int err = 0
|
|
cdef const char *path_c = NULL
|
|
cdef char **dataset_options = NULL
|
|
cdef const char *where_c = NULL
|
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
cdef OGRLayerH ogr_layer = NULL
|
|
cdef char **fields_c = NULL
|
|
cdef const char *field_c = NULL
|
|
cdef char **options = NULL
|
|
cdef ArrowArrayStream stream
|
|
cdef ArrowSchema schema
|
|
|
|
IF CTE_GDAL_VERSION < (3, 6, 0):
|
|
raise RuntimeError("Need GDAL>=3.6 for Arrow support")
|
|
|
|
path_b = path.encode('utf-8')
|
|
path_c = path_b
|
|
|
|
if force_2d:
|
|
raise ValueError("forcing 2D is not supported for Arrow")
|
|
|
|
if fids is not None:
|
|
raise ValueError("reading by FID is not supported for Arrow")
|
|
|
|
IF CTE_GDAL_VERSION < (3, 8, 0):
|
|
if skip_features:
|
|
raise ValueError(
|
|
"specifying 'skip_features' is not supported for Arrow for GDAL<3.8.0"
|
|
)
|
|
|
|
if skip_features < 0:
|
|
raise ValueError("'skip_features' must be >= 0")
|
|
|
|
if max_features:
|
|
raise ValueError(
|
|
"specifying 'max_features' is not supported for Arrow"
|
|
)
|
|
|
|
if sql is not None and layer is not None:
|
|
raise ValueError("'sql' paramater cannot be combined with 'layer'")
|
|
|
|
if not (read_geometry or return_fids or columns is None or len(columns) > 0):
|
|
raise ValueError(
|
|
"at least one of read_geometry or return_fids must be True or columns must "
|
|
"be None or non-empty"
|
|
)
|
|
|
|
if bbox and mask:
|
|
raise ValueError("cannot set both 'bbox' and 'mask'")
|
|
|
|
reader = None
|
|
try:
|
|
dataset_options = dict_to_options(dataset_kwargs)
|
|
ogr_dataset = ogr_open(path_c, 0, dataset_options)
|
|
|
|
if sql is None:
|
|
# layer defaults to index 0
|
|
if layer is None:
|
|
layer = 0
|
|
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
else:
|
|
ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
|
|
|
|
crs = get_crs(ogr_layer)
|
|
|
|
# Encoding is derived from the user, from the dataset capabilities / type,
|
|
# or from the system locale
|
|
encoding = (
|
|
encoding
|
|
or detect_encoding(ogr_dataset, ogr_layer)
|
|
or locale.getpreferredencoding()
|
|
)
|
|
|
|
fields = get_fields(ogr_layer, encoding, use_arrow=True)
|
|
|
|
ignored_fields = []
|
|
if columns is not None:
|
|
# Fields are matched exactly by name, duplicates are dropped.
|
|
ignored_fields = list(set(fields[:,2]) - set(columns))
|
|
if not read_geometry:
|
|
ignored_fields.append("OGR_GEOMETRY")
|
|
|
|
geometry_type = get_geometry_type(ogr_layer)
|
|
|
|
geometry_name = get_string(OGR_L_GetGeometryColumn(ogr_layer))
|
|
|
|
fid_column = get_string(OGR_L_GetFIDColumn(ogr_layer))
|
|
# OGR_L_GetFIDColumn returns the column name if it is a custom column,
|
|
# or "" if not. For arrow, the default column name is "OGC_FID".
|
|
if fid_column == "":
|
|
fid_column = "OGC_FID"
|
|
|
|
# Apply the attribute filter
|
|
if where is not None and where != "":
|
|
apply_where_filter(ogr_layer, where)
|
|
|
|
# Apply the spatial filter
|
|
if bbox is not None:
|
|
apply_bbox_filter(ogr_layer, bbox)
|
|
|
|
elif mask is not None:
|
|
apply_geometry_filter(ogr_layer, mask)
|
|
|
|
# Limit to specified columns
|
|
if ignored_fields:
|
|
for field in ignored_fields:
|
|
field_b = field.encode("utf-8")
|
|
field_c = field_b
|
|
fields_c = CSLAddString(fields_c, field_c)
|
|
|
|
OGR_L_SetIgnoredFields(ogr_layer, <const char**>fields_c)
|
|
|
|
if not return_fids:
|
|
options = CSLSetNameValue(options, "INCLUDE_FID", "NO")
|
|
|
|
if batch_size > 0:
|
|
options = CSLSetNameValue(
|
|
options,
|
|
"MAX_FEATURES_IN_BATCH",
|
|
str(batch_size).encode('UTF-8')
|
|
)
|
|
|
|
# make sure layer is read from beginning
|
|
OGR_L_ResetReading(ogr_layer)
|
|
|
|
if not OGR_L_GetArrowStream(ogr_layer, &stream, options):
|
|
raise RuntimeError("Failed to open ArrowArrayStream from Layer")
|
|
|
|
stream_ptr = <uintptr_t> &stream
|
|
|
|
if skip_features:
|
|
# only supported for GDAL >= 3.8.0; have to do this after getting
|
|
# the Arrow stream
|
|
OGR_L_SetNextByIndex(ogr_layer, skip_features)
|
|
|
|
# stream has to be consumed before the Dataset is closed
|
|
import pyarrow as pa
|
|
reader = pa.RecordBatchStreamReader._import_from_c(stream_ptr)
|
|
|
|
meta = {
|
|
'crs': crs,
|
|
'encoding': encoding,
|
|
'fields': fields[:,2], # return only names
|
|
'geometry_type': geometry_type,
|
|
'geometry_name': geometry_name,
|
|
'fid_column': fid_column,
|
|
}
|
|
|
|
yield meta, reader
|
|
|
|
finally:
|
|
if reader is not None:
|
|
# Mark reader as closed to prevent reading batches
|
|
reader.close()
|
|
|
|
CSLDestroy(options)
|
|
if fields_c != NULL:
|
|
CSLDestroy(fields_c)
|
|
fields_c = NULL
|
|
|
|
if dataset_options != NULL:
|
|
CSLDestroy(dataset_options)
|
|
dataset_options = NULL
|
|
|
|
if ogr_dataset != NULL:
|
|
if sql is not None:
|
|
GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
|
|
|
|
GDALClose(ogr_dataset)
|
|
ogr_dataset = NULL
|
|
|
|
def ogr_read_bounds(
|
|
str path,
|
|
object layer=None,
|
|
object encoding=None,
|
|
int read_geometry=True,
|
|
int force_2d=False,
|
|
object columns=None,
|
|
int skip_features=0,
|
|
int max_features=0,
|
|
object where=None,
|
|
tuple bbox=None,
|
|
object mask=None):
|
|
|
|
cdef int err = 0
|
|
cdef const char *path_c = NULL
|
|
cdef const char *where_c = NULL
|
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
cdef OGRLayerH ogr_layer = NULL
|
|
cdef int feature_count = 0
|
|
cdef double xmin, ymin, xmax, ymax
|
|
|
|
if bbox and mask:
|
|
raise ValueError("cannot set both 'bbox' and 'mask'")
|
|
|
|
if skip_features < 0:
|
|
raise ValueError("'skip_features' must be >= 0")
|
|
|
|
if max_features < 0:
|
|
raise ValueError("'max_features' must be >= 0")
|
|
|
|
path_b = path.encode('utf-8')
|
|
path_c = path_b
|
|
|
|
# layer defaults to index 0
|
|
if layer is None:
|
|
layer = 0
|
|
|
|
ogr_dataset = ogr_open(path_c, 0, NULL)
|
|
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
|
|
# Apply the attribute filter
|
|
if where is not None and where != "":
|
|
apply_where_filter(ogr_layer, where)
|
|
|
|
# Apply the spatial filter
|
|
if bbox is not None:
|
|
apply_bbox_filter(ogr_layer, bbox)
|
|
|
|
elif mask is not None:
|
|
apply_geometry_filter(ogr_layer, mask)
|
|
|
|
# Limit feature range to available range
|
|
skip_features, num_features = validate_feature_range(ogr_layer, skip_features, max_features)
|
|
|
|
return get_bounds(ogr_layer, skip_features, num_features)
|
|
|
|
|
|
def ogr_read_info(
|
|
str path,
|
|
dataset_kwargs,
|
|
object layer=None,
|
|
object encoding=None,
|
|
int force_feature_count=False,
|
|
int force_total_bounds=False):
|
|
|
|
cdef const char *path_c = NULL
|
|
cdef char **dataset_options = NULL
|
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
cdef OGRLayerH ogr_layer = NULL
|
|
|
|
path_b = path.encode('utf-8')
|
|
path_c = path_b
|
|
|
|
# layer defaults to index 0
|
|
if layer is None:
|
|
layer = 0
|
|
|
|
try:
|
|
dataset_options = dict_to_options(dataset_kwargs)
|
|
ogr_dataset = ogr_open(path_c, 0, dataset_options)
|
|
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
|
|
# Encoding is derived from the user, from the dataset capabilities / type,
|
|
# or from the system locale
|
|
encoding = (
|
|
encoding
|
|
or detect_encoding(ogr_dataset, ogr_layer)
|
|
or locale.getpreferredencoding()
|
|
)
|
|
|
|
fields = get_fields(ogr_layer, encoding)
|
|
|
|
meta = {
|
|
'crs': get_crs(ogr_layer),
|
|
'encoding': encoding,
|
|
'fields': fields[:,2], # return only names
|
|
'dtypes': fields[:,3],
|
|
'geometry_type': get_geometry_type(ogr_layer),
|
|
'features': get_feature_count(ogr_layer, force_feature_count),
|
|
'total_bounds': get_total_bounds(ogr_layer, force_total_bounds),
|
|
'driver': get_driver(ogr_dataset),
|
|
"capabilities": {
|
|
"random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead) == 1,
|
|
"fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex) == 1,
|
|
"fast_spatial_filter": OGR_L_TestCapability(ogr_layer, OLCFastSpatialFilter) == 1,
|
|
"fast_feature_count": OGR_L_TestCapability(ogr_layer, OLCFastFeatureCount) == 1,
|
|
"fast_total_bounds": OGR_L_TestCapability(ogr_layer, OLCFastGetExtent) == 1,
|
|
},
|
|
'layer_metadata': get_metadata(ogr_layer),
|
|
'dataset_metadata': get_metadata(ogr_dataset),
|
|
}
|
|
|
|
finally:
|
|
if dataset_options != NULL:
|
|
CSLDestroy(dataset_options)
|
|
dataset_options = NULL
|
|
|
|
if ogr_dataset != NULL:
|
|
GDALClose(ogr_dataset)
|
|
ogr_dataset = NULL
|
|
|
|
return meta
|
|
|
|
|
|
def ogr_list_layers(str path):
|
|
cdef const char *path_c = NULL
|
|
cdef const char *ogr_name = NULL
|
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
cdef OGRLayerH ogr_layer = NULL
|
|
|
|
path_b = path.encode('utf-8')
|
|
path_c = path_b
|
|
|
|
ogr_dataset = ogr_open(path_c, 0, NULL)
|
|
|
|
layer_count = GDALDatasetGetLayerCount(ogr_dataset)
|
|
|
|
data = np.empty(shape=(layer_count, 2), dtype=object)
|
|
data_view = data[:]
|
|
for i in range(layer_count):
|
|
ogr_layer = GDALDatasetGetLayer(ogr_dataset, i)
|
|
|
|
data_view[i, 0] = get_string(OGR_L_GetName(ogr_layer))
|
|
data_view[i, 1] = get_geometry_type(ogr_layer)
|
|
|
|
if ogr_dataset != NULL:
|
|
GDALClose(ogr_dataset)
|
|
ogr_dataset = NULL
|
|
|
|
return data
|
|
|
|
|
|
# NOTE: all modes are write-only
|
|
# some data sources have multiple layers
|
|
cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) except NULL:
|
|
cdef void *ogr_driver = NULL
|
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
|
|
# Get the driver
|
|
try:
|
|
ogr_driver = exc_wrap_pointer(GDALGetDriverByName(driver_c))
|
|
|
|
except NullPointerError:
|
|
raise DataSourceError(f"Could not obtain driver: {driver_c.decode('utf-8')} (check that it was installed correctly into GDAL)")
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise DataSourceError(str(exc))
|
|
|
|
# Create the dataset
|
|
try:
|
|
ogr_dataset = exc_wrap_pointer(GDALCreate(ogr_driver, path_c, 0, 0, 0, GDT_Unknown, options))
|
|
|
|
except NullPointerError:
|
|
raise DataSourceError(f"Failed to create dataset with driver: {path_c.decode('utf-8')} {driver_c.decode('utf-8')}") from None
|
|
|
|
except CPLE_NotSupportedError as exc:
|
|
raise DataSourceError(f"Driver {driver_c.decode('utf-8')} does not support write functionality") from None
|
|
|
|
except CPLE_BaseError as exc:
|
|
raise DataSourceError(str(exc))
|
|
|
|
return ogr_dataset
|
|
|
|
|
|
cdef void * create_crs(str crs) except NULL:
|
|
cdef char *crs_c = NULL
|
|
cdef void *ogr_crs = NULL
|
|
|
|
crs_b = crs.encode('UTF-8')
|
|
crs_c = crs_b
|
|
|
|
try:
|
|
ogr_crs = exc_wrap_pointer(OSRNewSpatialReference(NULL))
|
|
err = OSRSetFromUserInput(ogr_crs, crs_c)
|
|
if err:
|
|
raise CRSError("Could not set CRS: {}".format(crs_c.decode('UTF-8'))) from None
|
|
|
|
except CPLE_BaseError as exc:
|
|
OSRRelease(ogr_crs)
|
|
raise CRSError("Could not set CRS: {}".format(exc))
|
|
|
|
return ogr_crs
|
|
|
|
|
|
cdef infer_field_types(list dtypes):
|
|
cdef int field_type = 0
|
|
cdef int field_subtype = 0
|
|
cdef int width = 0
|
|
cdef int precision = 0
|
|
|
|
field_types = np.zeros(shape=(len(dtypes), 4), dtype=int)
|
|
field_types_view = field_types[:]
|
|
|
|
for i in range(len(dtypes)):
|
|
dtype = dtypes[i]
|
|
|
|
if dtype.name in DTYPE_OGR_FIELD_TYPES:
|
|
field_type, field_subtype = DTYPE_OGR_FIELD_TYPES[dtype.name]
|
|
field_types_view[i, 0] = field_type
|
|
field_types_view[i, 1] = field_subtype
|
|
|
|
# Determine field type from ndarray values
|
|
elif dtype == np.dtype('O'):
|
|
# Object type is ambiguous: could be a string or binary data
|
|
# TODO: handle binary or other types
|
|
# for now fall back to string (same as Geopandas)
|
|
field_types_view[i, 0] = OFTString
|
|
# Convert to unicode string then take itemsize
|
|
# TODO: better implementation of this
|
|
# width = values.astype(np.unicode_).dtype.itemsize // 4
|
|
# DO WE NEED WIDTH HERE?
|
|
|
|
elif dtype.type is np.unicode_ or dtype.type is np.string_:
|
|
field_types_view[i, 0] = OFTString
|
|
field_types_view[i, 2] = int(dtype.itemsize // 4)
|
|
|
|
elif dtype.name.startswith("datetime64"):
|
|
# datetime dtype precision is specified with eg. [ms], but this isn't
|
|
# usefull when writing to gdal.
|
|
field_type, field_subtype = DTYPE_OGR_FIELD_TYPES["datetime64"]
|
|
field_types_view[i, 0] = field_type
|
|
field_types_view[i, 1] = field_subtype
|
|
|
|
else:
|
|
raise NotImplementedError(f"field type is not supported {dtype.name} (field index: {i})")
|
|
|
|
return field_types
|
|
|
|
|
|
# TODO: set geometry and field data as memory views?
|
|
def ogr_write(
|
|
str path, str layer, str driver, geometry, fields, field_data, field_mask,
|
|
str crs, str geometry_type, str encoding, object dataset_kwargs,
|
|
object layer_kwargs, bint promote_to_multi=False, bint nan_as_null=True,
|
|
bint append=False, dataset_metadata=None, layer_metadata=None,
|
|
gdal_tz_offsets=None
|
|
):
|
|
cdef const char *path_c = NULL
|
|
cdef const char *layer_c = NULL
|
|
cdef const char *driver_c = NULL
|
|
cdef const char *crs_c = NULL
|
|
cdef const char *encoding_c = NULL
|
|
cdef char **dataset_options = NULL
|
|
cdef char **layer_options = NULL
|
|
cdef const char *ogr_name = NULL
|
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
cdef OGRLayerH ogr_layer = NULL
|
|
cdef OGRFeatureH ogr_feature = NULL
|
|
cdef OGRGeometryH ogr_geometry = NULL
|
|
cdef OGRGeometryH ogr_geometry_multi = NULL
|
|
cdef OGRFeatureDefnH ogr_featuredef = NULL
|
|
cdef OGRFieldDefnH ogr_fielddef = NULL
|
|
cdef unsigned char *wkb_buffer = NULL
|
|
cdef OGRSpatialReferenceH ogr_crs = NULL
|
|
cdef int layer_idx = -1
|
|
cdef int supports_transactions = 0
|
|
cdef OGRwkbGeometryType geometry_code
|
|
cdef int err = 0
|
|
cdef int i = 0
|
|
cdef int num_records = -1
|
|
cdef int num_field_data = len(field_data) if field_data is not None else 0
|
|
cdef int num_fields = len(fields) if fields is not None else 0
|
|
|
|
if num_fields != num_field_data:
|
|
raise ValueError("field_data array needs to be same length as fields array")
|
|
|
|
if num_fields == 0 and geometry is None:
|
|
raise ValueError("You must provide at least a geometry column or a field")
|
|
|
|
if num_fields > 0:
|
|
num_records = len(field_data[0])
|
|
for i in range(1, len(field_data)):
|
|
if len(field_data[i]) != num_records:
|
|
raise ValueError("field_data arrays must be same length")
|
|
|
|
if geometry is None:
|
|
# If no geometry data, we ignore the geometry_type and don't create a geometry
|
|
# column
|
|
geometry_type = None
|
|
else:
|
|
if num_fields > 0:
|
|
if len(geometry) != num_records:
|
|
raise ValueError(
|
|
"field_data arrays must be same length as geometry array"
|
|
)
|
|
else:
|
|
num_records = len(geometry)
|
|
|
|
if field_mask is not None:
|
|
if len(field_data) != len(field_mask):
|
|
raise ValueError("field_data and field_mask must be same length")
|
|
for i in range(0, len(field_mask)):
|
|
if field_mask[i] is not None and len(field_mask[i]) != num_records:
|
|
raise ValueError("field_mask arrays must be same length as geometry array")
|
|
else:
|
|
field_mask = [None] * num_fields
|
|
|
|
path_b = path.encode('UTF-8')
|
|
path_c = path_b
|
|
|
|
driver_b = driver.encode('UTF-8')
|
|
driver_c = driver_b
|
|
|
|
if not layer:
|
|
layer = os.path.splitext(os.path.split(path)[1])[0]
|
|
|
|
if gdal_tz_offsets is None:
|
|
gdal_tz_offsets = {}
|
|
|
|
|
|
# if shapefile, GeoJSON, or FlatGeobuf, always delete first
|
|
# for other types, check if we can create layers
|
|
# GPKG might be the only multi-layer writeable type. TODO: check this
|
|
if driver in ('ESRI Shapefile', 'GeoJSON', 'GeoJSONSeq', 'FlatGeobuf') and os.path.exists(path):
|
|
if not append:
|
|
os.unlink(path)
|
|
|
|
layer_exists = False
|
|
if os.path.exists(path):
|
|
try:
|
|
ogr_dataset = ogr_open(path_c, 1, NULL)
|
|
|
|
for i in range(GDALDatasetGetLayerCount(ogr_dataset)):
|
|
name = OGR_L_GetName(GDALDatasetGetLayer(ogr_dataset, i))
|
|
if layer == name.decode('UTF-8'):
|
|
layer_idx = i
|
|
break
|
|
|
|
if layer_idx >= 0:
|
|
layer_exists = True
|
|
|
|
if not append:
|
|
GDALDatasetDeleteLayer(ogr_dataset, layer_idx)
|
|
|
|
except DataSourceError as exc:
|
|
# open failed
|
|
if append:
|
|
raise exc
|
|
|
|
# otherwise create from scratch
|
|
os.unlink(path)
|
|
ogr_dataset = NULL
|
|
|
|
# either it didn't exist or could not open it in write mode
|
|
if ogr_dataset == NULL:
|
|
dataset_options = dict_to_options(dataset_kwargs)
|
|
ogr_dataset = ogr_create(path_c, driver_c, dataset_options)
|
|
|
|
# if we are not appending to an existing layer, we need to create
|
|
# the layer and all associated properties (CRS, field defs, etc)
|
|
create_layer = not (append and layer_exists)
|
|
|
|
### Create the layer
|
|
if create_layer:
|
|
# Create the CRS
|
|
if crs is not None:
|
|
try:
|
|
ogr_crs = create_crs(crs)
|
|
|
|
except Exception as exc:
|
|
OGRReleaseDataSource(ogr_dataset)
|
|
ogr_dataset = NULL
|
|
if dataset_options != NULL:
|
|
CSLDestroy(dataset_options)
|
|
dataset_options = NULL
|
|
raise exc
|
|
|
|
# Setup layer creation options
|
|
if not encoding:
|
|
encoding = locale.getpreferredencoding()
|
|
|
|
if driver == 'ESRI Shapefile':
|
|
# Fiona only sets encoding for shapefiles; other drivers do not support
|
|
# encoding as an option.
|
|
encoding_b = encoding.upper().encode('UTF-8')
|
|
encoding_c = encoding_b
|
|
layer_options = CSLSetNameValue(layer_options, "ENCODING", encoding_c)
|
|
|
|
# Setup other layer creation options
|
|
for k, v in layer_kwargs.items():
|
|
k = k.encode('UTF-8')
|
|
v = v.encode('UTF-8')
|
|
layer_options = CSLAddNameValue(layer_options, <const char *>k, <const char *>v)
|
|
|
|
### Get geometry type
|
|
# TODO: this is brittle for 3D / ZM / M types
|
|
# TODO: fail on M / ZM types
|
|
geometry_code = get_geometry_type_code(geometry_type)
|
|
|
|
try:
|
|
if create_layer:
|
|
layer_b = layer.encode('UTF-8')
|
|
layer_c = layer_b
|
|
|
|
ogr_layer = exc_wrap_pointer(
|
|
GDALDatasetCreateLayer(ogr_dataset, layer_c, ogr_crs,
|
|
geometry_code, layer_options))
|
|
|
|
else:
|
|
ogr_layer = exc_wrap_pointer(get_ogr_layer(ogr_dataset, layer))
|
|
|
|
# Set dataset and layer metadata
|
|
set_metadata(ogr_dataset, dataset_metadata)
|
|
set_metadata(ogr_layer, layer_metadata)
|
|
|
|
except Exception as exc:
|
|
OGRReleaseDataSource(ogr_dataset)
|
|
ogr_dataset = NULL
|
|
raise DataLayerError(str(exc))
|
|
|
|
finally:
|
|
if ogr_crs != NULL:
|
|
OSRRelease(ogr_crs)
|
|
ogr_crs = NULL
|
|
|
|
if dataset_options != NULL:
|
|
CSLDestroy(dataset_options)
|
|
dataset_options = NULL
|
|
|
|
if layer_options != NULL:
|
|
CSLDestroy(layer_options)
|
|
layer_options = NULL
|
|
|
|
### Create the fields
|
|
field_types = None
|
|
if num_fields > 0:
|
|
field_types = infer_field_types([field.dtype for field in field_data])
|
|
|
|
### Create the fields
|
|
if create_layer:
|
|
for i in range(num_fields):
|
|
field_type, field_subtype, width, precision = field_types[i]
|
|
|
|
name_b = fields[i].encode(encoding)
|
|
try:
|
|
ogr_fielddef = exc_wrap_pointer(OGR_Fld_Create(name_b, field_type))
|
|
|
|
# subtypes, see: https://gdal.org/development/rfc/rfc50_ogr_field_subtype.html
|
|
if field_subtype != OFSTNone:
|
|
OGR_Fld_SetSubType(ogr_fielddef, field_subtype)
|
|
|
|
if width:
|
|
OGR_Fld_SetWidth(ogr_fielddef, width)
|
|
|
|
# TODO: set precision
|
|
|
|
except:
|
|
if ogr_fielddef != NULL:
|
|
OGR_Fld_Destroy(ogr_fielddef)
|
|
ogr_fielddef = NULL
|
|
|
|
OGRReleaseDataSource(ogr_dataset)
|
|
ogr_dataset = NULL
|
|
raise FieldError(f"Error creating field '{fields[i]}' from field_data") from None
|
|
|
|
try:
|
|
exc_wrap_int(OGR_L_CreateField(ogr_layer, ogr_fielddef, 1))
|
|
|
|
except:
|
|
OGRReleaseDataSource(ogr_dataset)
|
|
ogr_dataset = NULL
|
|
raise FieldError(f"Error adding field '{fields[i]}' to layer") from None
|
|
|
|
finally:
|
|
if ogr_fielddef != NULL:
|
|
OGR_Fld_Destroy(ogr_fielddef)
|
|
|
|
|
|
### Create the features
|
|
ogr_featuredef = OGR_L_GetLayerDefn(ogr_layer)
|
|
|
|
supports_transactions = OGR_L_TestCapability(ogr_layer, OLCTransactions)
|
|
if supports_transactions:
|
|
start_transaction(ogr_dataset, 0)
|
|
|
|
for i in range(num_records):
|
|
try:
|
|
# create the feature
|
|
ogr_feature = OGR_F_Create(ogr_featuredef)
|
|
if ogr_feature == NULL:
|
|
raise FeatureError(f"Could not create feature at index {i}") from None
|
|
|
|
# create the geometry based on specific WKB type (there might be mixed types in geometries)
|
|
# TODO: geometry must not be null or errors
|
|
wkb = None if geometry is None else geometry[i]
|
|
if wkb is not None:
|
|
wkbtype = <int>bytearray(wkb)[1]
|
|
# may need to consider all 4 bytes: int.from_bytes(wkb[0][1:4], byteorder="little")
|
|
# use "little" if the first byte == 1
|
|
ogr_geometry = OGR_G_CreateGeometry(<OGRwkbGeometryType>wkbtype)
|
|
if ogr_geometry == NULL:
|
|
raise GeometryError(f"Could not create geometry at index {i} for WKB type {wkbtype}") from None
|
|
|
|
# import the WKB
|
|
wkb_buffer = wkb
|
|
err = OGR_G_ImportFromWkb(ogr_geometry, wkb_buffer, len(wkb))
|
|
if err:
|
|
if ogr_geometry != NULL:
|
|
OGR_G_DestroyGeometry(ogr_geometry)
|
|
ogr_geometry = NULL
|
|
raise GeometryError(f"Could not create geometry from WKB at index {i}") from None
|
|
|
|
# Convert to multi type
|
|
if promote_to_multi:
|
|
if wkbtype in (wkbPoint, wkbPoint25D, wkbPointM, wkbPointZM):
|
|
ogr_geometry = OGR_G_ForceToMultiPoint(ogr_geometry)
|
|
elif wkbtype in (wkbLineString, wkbLineString25D, wkbLineStringM, wkbLineStringZM):
|
|
ogr_geometry = OGR_G_ForceToMultiLineString(ogr_geometry)
|
|
elif wkbtype in (wkbPolygon, wkbPolygon25D, wkbPolygonM, wkbPolygonZM):
|
|
ogr_geometry = OGR_G_ForceToMultiPolygon(ogr_geometry)
|
|
|
|
# Set the geometry on the feature
|
|
# this assumes ownership of the geometry and it's cleanup
|
|
err = OGR_F_SetGeometryDirectly(ogr_feature, ogr_geometry)
|
|
if err:
|
|
raise GeometryError(f"Could not set geometry for feature at index {i}") from None
|
|
|
|
# Set field values
|
|
for field_idx in range(num_fields):
|
|
field_value = field_data[field_idx][i]
|
|
field_type = field_types[field_idx][0]
|
|
|
|
mask = field_mask[field_idx]
|
|
if mask is not None and mask[i]:
|
|
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
|
|
elif field_type == OFTString:
|
|
# TODO: encode string using approach from _get_internal_encoding which checks layer capabilities
|
|
if (
|
|
field_value is None
|
|
or (isinstance(field_value, float) and isnan(field_value))
|
|
):
|
|
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
|
|
else:
|
|
if not isinstance(field_value, str):
|
|
field_value = str(field_value)
|
|
|
|
try:
|
|
value_b = field_value.encode("UTF-8")
|
|
OGR_F_SetFieldString(ogr_feature, field_idx, value_b)
|
|
|
|
except AttributeError:
|
|
raise ValueError(f"Could not encode value '{field_value}' in field '{fields[field_idx]}' to string")
|
|
|
|
except Exception:
|
|
raise
|
|
|
|
elif field_type == OFTInteger:
|
|
OGR_F_SetFieldInteger(ogr_feature, field_idx, field_value)
|
|
|
|
elif field_type == OFTInteger64:
|
|
OGR_F_SetFieldInteger64(ogr_feature, field_idx, field_value)
|
|
|
|
elif field_type == OFTReal:
|
|
if nan_as_null and isnan(field_value):
|
|
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
else:
|
|
OGR_F_SetFieldDouble(ogr_feature, field_idx, field_value)
|
|
|
|
elif field_type == OFTDate:
|
|
if np.isnat(field_value):
|
|
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
else:
|
|
datetime = field_value.item()
|
|
OGR_F_SetFieldDateTimeEx(
|
|
ogr_feature,
|
|
field_idx,
|
|
datetime.year,
|
|
datetime.month,
|
|
datetime.day,
|
|
0,
|
|
0,
|
|
0.0,
|
|
0
|
|
)
|
|
|
|
elif field_type == OFTDateTime:
|
|
if np.isnat(field_value):
|
|
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
else:
|
|
datetime = field_value.astype("datetime64[ms]").item()
|
|
tz_array = gdal_tz_offsets.get(fields[field_idx], None)
|
|
if tz_array is None:
|
|
gdal_tz = 0
|
|
else:
|
|
gdal_tz = tz_array[i]
|
|
OGR_F_SetFieldDateTimeEx(
|
|
ogr_feature,
|
|
field_idx,
|
|
datetime.year,
|
|
datetime.month,
|
|
datetime.day,
|
|
datetime.hour,
|
|
datetime.minute,
|
|
datetime.second + datetime.microsecond / 10**6,
|
|
gdal_tz
|
|
)
|
|
|
|
else:
|
|
raise NotImplementedError(f"OGR field type is not supported for writing: {field_type}")
|
|
|
|
|
|
# Add feature to the layer
|
|
try:
|
|
exc_wrap_int(OGR_L_CreateFeature(ogr_layer, ogr_feature))
|
|
except CPLE_BaseError as exc:
|
|
raise FeatureError(f"Could not add feature to layer at index {i}: {exc}") from None
|
|
|
|
finally:
|
|
if ogr_feature != NULL:
|
|
OGR_F_Destroy(ogr_feature)
|
|
ogr_feature = NULL
|
|
|
|
if supports_transactions:
|
|
commit_transaction(ogr_dataset)
|
|
|
|
log.info(f"Created {num_records:,} records" )
|
|
|
|
### Final cleanup
|
|
if ogr_dataset != NULL:
|
|
GDALClose(ogr_dataset)
|
|
|
|
# GDAL will set an error if there was an error writing the data source
|
|
# on close
|
|
exc = exc_check()
|
|
if exc:
|
|
raise DataSourceError(f"Failed to write features to dataset {path}; {exc}") |