that's too much!
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
135
.venv/lib/python3.12/site-packages/pyogrio/tests/conftest.py
Normal file
135
.venv/lib/python3.12/site-packages/pyogrio/tests/conftest.py
Normal file
@@ -0,0 +1,135 @@
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
import pytest
|
||||
|
||||
from pyogrio import (
|
||||
__gdal_version_string__,
|
||||
__version__,
|
||||
list_drivers,
|
||||
)
|
||||
from pyogrio._compat import HAS_ARROW_API, HAS_GDAL_GEOS, HAS_SHAPELY
|
||||
from pyogrio.raw import read, write
|
||||
|
||||
|
||||
_data_dir = Path(__file__).parent.resolve() / "fixtures"
|
||||
|
||||
# mapping of driver extension to driver name for well-supported drivers
|
||||
DRIVERS = {
|
||||
".fgb": "FlatGeobuf",
|
||||
".geojson": "GeoJSON",
|
||||
".geojsonl": "GeoJSONSeq",
|
||||
".geojsons": "GeoJSONSeq",
|
||||
".gpkg": "GPKG",
|
||||
".shp": "ESRI Shapefile",
|
||||
}
|
||||
|
||||
# mapping of driver name to extension
|
||||
DRIVER_EXT = {driver: ext for ext, driver in DRIVERS.items()}
|
||||
|
||||
ALL_EXTS = [".fgb", ".geojson", ".geojsonl", ".gpkg", ".shp"]
|
||||
|
||||
|
||||
def pytest_report_header(config):
|
||||
drivers = ", ".join(
|
||||
f"{driver}({capability})"
|
||||
for driver, capability in sorted(list_drivers().items())
|
||||
)
|
||||
return (
|
||||
f"pyogrio {__version__}\n"
|
||||
f"GDAL {__gdal_version_string__}\n"
|
||||
f"Supported drivers: {drivers}"
|
||||
)
|
||||
|
||||
|
||||
# marks to skip tests if optional dependecies are not present
|
||||
requires_arrow_api = pytest.mark.skipif(
|
||||
not HAS_ARROW_API, reason="GDAL>=3.6 and pyarrow required"
|
||||
)
|
||||
|
||||
requires_gdal_geos = pytest.mark.skipif(
|
||||
not HAS_GDAL_GEOS, reason="GDAL compiled with GEOS required"
|
||||
)
|
||||
|
||||
requires_shapely = pytest.mark.skipif(not HAS_SHAPELY, reason="Shapely >= 2.0 required")
|
||||
|
||||
|
||||
def prepare_testfile(testfile_path, dst_dir, ext):
|
||||
if ext == testfile_path.suffix:
|
||||
return testfile_path
|
||||
|
||||
dst_path = dst_dir / f"{testfile_path.stem}{ext}"
|
||||
if dst_path.exists():
|
||||
return dst_path
|
||||
|
||||
meta, _, geometry, field_data = read(testfile_path)
|
||||
|
||||
if ext == ".fgb":
|
||||
# For .fgb, spatial_index=False to avoid the rows being reordered
|
||||
meta["spatial_index"] = False
|
||||
# allow mixed Polygons/MultiPolygons type
|
||||
meta["geometry_type"] = "Unknown"
|
||||
|
||||
elif ext == ".gpkg":
|
||||
# For .gpkg, spatial_index=False to avoid the rows being reordered
|
||||
meta["spatial_index"] = False
|
||||
meta["geometry_type"] = "MultiPolygon"
|
||||
|
||||
write(dst_path, geometry, field_data, **meta)
|
||||
return dst_path
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def data_dir():
|
||||
return _data_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def naturalearth_lowres(tmp_path, request):
|
||||
ext = getattr(request, "param", ".shp")
|
||||
testfile_path = _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp")
|
||||
|
||||
return prepare_testfile(testfile_path, tmp_path, ext)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", params=ALL_EXTS)
|
||||
def naturalearth_lowres_all_ext(tmp_path, naturalearth_lowres, request):
|
||||
return prepare_testfile(naturalearth_lowres, tmp_path, request.param)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres):
|
||||
"""Wrap naturalearth_lowres as a zip file for vsi tests"""
|
||||
|
||||
path = tmp_path / f"{naturalearth_lowres.name}.zip"
|
||||
with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out:
|
||||
for ext in ["dbf", "prj", "shp", "shx"]:
|
||||
filename = f"{naturalearth_lowres.stem}.{ext}"
|
||||
out.write(naturalearth_lowres.parent / filename, filename)
|
||||
|
||||
return path, f"/vsizip/{path}/{naturalearth_lowres.name}"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_fgdb_vsi():
|
||||
return f"/vsizip/{_data_dir}/test_fgdb.gdb.zip"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_gpkg_nulls():
|
||||
return _data_dir / "test_gpkg_nulls.gpkg"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_ogr_types_list():
|
||||
return _data_dir / "test_ogr_types_list.geojson"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_datetime():
|
||||
return _data_dir / "test_datetime.geojson"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_datetime_tz():
|
||||
return _data_dir / "test_datetime_tz.geojson"
|
||||
89
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/README.md
vendored
Normal file
89
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/README.md
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
# Test datasets
|
||||
|
||||
## Natural Earth lowres
|
||||
|
||||
`naturalearth_lowres.shp` was copied from GeoPandas.
|
||||
|
||||
## FGDB test dataset
|
||||
|
||||
`test_fgdb.gdb.zip`
|
||||
Downloaded from http://trac.osgeo.org/gdal/raw-attachment/wiki/FileGDB/test_fgdb.gdb.zip
|
||||
|
||||
### GPKG test dataset with null values
|
||||
|
||||
`test_gpkg_nulls.gpkg` was created using Fiona backend to GeoPandas:
|
||||
|
||||
```
|
||||
from collections import OrderedDict
|
||||
|
||||
import fiona
|
||||
import geopandas as gp
|
||||
import numpy as np
|
||||
from pyogrio import write_dataframe
|
||||
|
||||
filename = "test_gpkg_nulls.gpkg"
|
||||
|
||||
df = gp.GeoDataFrame(
|
||||
{
|
||||
"col_bool": np.array([True, False, True], dtype="bool"),
|
||||
"col_int8": np.array([1, 2, 3], dtype="int8"),
|
||||
"col_int16": np.array([1, 2, 3], dtype="int16"),
|
||||
"col_int32": np.array([1, 2, 3], dtype="int32"),
|
||||
"col_int64": np.array([1, 2, 3], dtype="int64"),
|
||||
"col_uint8": np.array([1, 2, 3], dtype="uint8"),
|
||||
"col_uint16": np.array([1, 2, 3], dtype="uint16"),
|
||||
"col_uint32": np.array([1, 2, 3], dtype="uint32"),
|
||||
"col_uint64": np.array([1, 2, 3], dtype="uint64"),
|
||||
"col_float32": np.array([1.5, 2.5, 3.5], dtype="float32"),
|
||||
"col_float64": np.array([1.5, 2.5, 3.5], dtype="float64"),
|
||||
},
|
||||
geometry=gp.points_from_xy([0, 1, 2], [0, 1, 2]),
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
write_dataframe(df, filename)
|
||||
|
||||
# construct row with null values
|
||||
# Note: np.nan can only be used for float values
|
||||
null_row = {
|
||||
"type": "Fetaure",
|
||||
"id": 4,
|
||||
"properties": OrderedDict(
|
||||
[
|
||||
("col_bool", None),
|
||||
("col_int8", None),
|
||||
("col_int16", None),
|
||||
("col_int32", None),
|
||||
("col_int64", None),
|
||||
("col_uint8", None),
|
||||
("col_uint16", None),
|
||||
("col_uint32", None),
|
||||
("col_uint64", None),
|
||||
("col_float32", np.nan),
|
||||
("col_float64", np.nan),
|
||||
]
|
||||
),
|
||||
"geometry": {"type": "Point", "coordinates": (4.0, 4.0)},
|
||||
}
|
||||
|
||||
# append row with nulls to GPKG
|
||||
with fiona.open(filename, "a") as c:
|
||||
c.write(null_row)
|
||||
```
|
||||
|
||||
NOTE: Reading boolean values into GeoPandas using Fiona backend treats those
|
||||
values as `None` and column dtype as `object`; Pyogrio treats those values as
|
||||
`np.nan` and column dtype as `float64`.
|
||||
|
||||
### GPKG test with MultiSurface
|
||||
|
||||
This was extracted from https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_0308_HU4_GDB.zip
|
||||
`NHDWaterbody` layer using ogr2ogr:
|
||||
|
||||
```bash
|
||||
ogr2ogr test_mixed_surface.gpkg NHDPLUS_H_0308_HU4_GDB.gdb NHDWaterbody -where '"NHDPlusID" = 15000300070477' -select "NHDPlusID"
|
||||
```
|
||||
|
||||
### OSM PBF test
|
||||
|
||||
This was downloaded from https://github.com/openstreetmap/OSM-binary/blob/master/resources/sample.pbf
|
||||
@@ -0,0 +1 @@
|
||||
ISO-8859-1
|
||||
Binary file not shown.
@@ -0,0 +1 @@
|
||||
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
|
||||
Binary file not shown.
Binary file not shown.
BIN
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/sample.osm.pbf
vendored
Normal file
BIN
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/sample.osm.pbf
vendored
Normal file
Binary file not shown.
7
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_datetime.geojson
vendored
Normal file
7
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_datetime.geojson
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
|
||||
{ "type": "Feature", "properties": { "col": "2020-01-01T10:00:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
|
||||
]
|
||||
}
|
||||
8
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_datetime_tz.geojson
vendored
Normal file
8
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_datetime_tz.geojson
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T09:00:00.123-05:00" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
|
||||
{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T10:00:00-05:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
|
||||
]
|
||||
}
|
||||
BIN
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_fgdb.gdb.zip
vendored
Normal file
BIN
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_fgdb.gdb.zip
vendored
Normal file
Binary file not shown.
BIN
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg
vendored
Normal file
BIN
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg
vendored
Normal file
Binary file not shown.
BIN
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_multisurface.gpkg
vendored
Normal file
BIN
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_multisurface.gpkg
vendored
Normal file
Binary file not shown.
18
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_nested.geojson
vendored
Normal file
18
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_nested.geojson
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [0, 0]
|
||||
},
|
||||
"properties": {
|
||||
"top_level": "A",
|
||||
"intermediate_level": {
|
||||
"bottom_level": "B"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
12
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_ogr_types_list.geojson
vendored
Normal file
12
.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_ogr_types_list.geojson
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"name": "test",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "int64": 1, "list_int64": [ 0, 1 ] }, "geometry": { "type": "Point", "coordinates": [ 0.0, 2.0 ] } },
|
||||
{ "type": "Feature", "properties": { "int64": 2, "list_int64": [ 2, 3 ] }, "geometry": { "type": "Point", "coordinates": [ 1.0, 2.0 ] } },
|
||||
{ "type": "Feature", "properties": { "int64": 3, "list_int64": [ 4, 5 ] }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } },
|
||||
{ "type": "Feature", "properties": { "int64": 4, "list_int64": [ 6, 7 ] }, "geometry": { "type": "Point", "coordinates": [ 3.0, 2.0 ] } },
|
||||
{ "type": "Feature", "properties": { "int64": 5, "list_int64": [ 8, 9 ] }, "geometry": { "type": "Point", "coordinates": [ 4.0, 2.0 ] } }
|
||||
]
|
||||
}
|
||||
207
.venv/lib/python3.12/site-packages/pyogrio/tests/test_arrow.py
Normal file
207
.venv/lib/python3.12/site-packages/pyogrio/tests/test_arrow.py
Normal file
@@ -0,0 +1,207 @@
|
||||
import contextlib
|
||||
import math
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from pyogrio import __gdal_version__, read_dataframe
|
||||
from pyogrio.raw import open_arrow, read_arrow
|
||||
from pyogrio.tests.conftest import requires_arrow_api
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
from pandas.testing import assert_frame_equal, assert_index_equal
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
|
||||
import pyarrow
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# skip all tests in this file if Arrow API or GeoPandas are unavailable
|
||||
pytestmark = requires_arrow_api
|
||||
pytest.importorskip("geopandas")
|
||||
|
||||
|
||||
def test_read_arrow(naturalearth_lowres_all_ext):
|
||||
result = read_dataframe(naturalearth_lowres_all_ext, use_arrow=True)
|
||||
expected = read_dataframe(naturalearth_lowres_all_ext, use_arrow=False)
|
||||
|
||||
if naturalearth_lowres_all_ext.suffix.startswith(".geojson"):
|
||||
check_less_precise = True
|
||||
else:
|
||||
check_less_precise = False
|
||||
assert_geodataframe_equal(result, expected, check_less_precise=check_less_precise)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skip_features, expected", [(10, 167), (200, 0)])
|
||||
def test_read_arrow_skip_features(naturalearth_lowres, skip_features, expected):
|
||||
table = read_arrow(naturalearth_lowres, skip_features=skip_features)[1]
|
||||
assert len(table) == expected
|
||||
|
||||
|
||||
def test_read_arrow_negative_skip_features(naturalearth_lowres):
|
||||
with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
|
||||
read_arrow(naturalearth_lowres, skip_features=-1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"max_features, expected", [(0, 0), (10, 10), (200, 177), (100000, 177)]
|
||||
)
|
||||
def test_read_arrow_max_features(naturalearth_lowres, max_features, expected):
|
||||
table = read_arrow(naturalearth_lowres, max_features=max_features)[1]
|
||||
assert len(table) == expected
|
||||
|
||||
|
||||
def test_read_arrow_negative_max_features(naturalearth_lowres):
|
||||
with pytest.raises(ValueError, match="'max_features' must be >= 0"):
|
||||
read_arrow(naturalearth_lowres, max_features=-1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"skip_features, max_features, expected",
|
||||
[
|
||||
(0, 0, 0),
|
||||
(10, 0, 0),
|
||||
(200, 0, 0),
|
||||
(1, 200, 176),
|
||||
(176, 10, 1),
|
||||
(100, 100, 77),
|
||||
(100, 100000, 77),
|
||||
],
|
||||
)
|
||||
def test_read_arrow_skip_features_max_features(
|
||||
naturalearth_lowres, skip_features, max_features, expected
|
||||
):
|
||||
table = read_arrow(
|
||||
naturalearth_lowres, skip_features=skip_features, max_features=max_features
|
||||
)[1]
|
||||
assert len(table) == expected
|
||||
|
||||
|
||||
def test_read_arrow_fid(naturalearth_lowres_all_ext):
|
||||
kwargs = {"use_arrow": True, "where": "fid >= 2 AND fid <= 3"}
|
||||
|
||||
df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
|
||||
assert_index_equal(df.index, pd.RangeIndex(0, 2))
|
||||
|
||||
df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=True, **kwargs)
|
||||
assert_index_equal(df.index, pd.Index([2, 3], name="fid"))
|
||||
|
||||
|
||||
def test_read_arrow_columns(naturalearth_lowres):
|
||||
result = read_dataframe(naturalearth_lowres, use_arrow=True, columns=["continent"])
|
||||
assert result.columns.tolist() == ["continent", "geometry"]
|
||||
|
||||
|
||||
def test_read_arrow_ignore_geometry(naturalearth_lowres):
|
||||
result = read_dataframe(naturalearth_lowres, use_arrow=True, read_geometry=False)
|
||||
assert type(result) is pd.DataFrame
|
||||
|
||||
expected = read_dataframe(naturalearth_lowres, use_arrow=True).drop(
|
||||
columns=["geometry"]
|
||||
)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_arrow_nested_types(test_ogr_types_list):
|
||||
# with arrow, list types are supported
|
||||
result = read_dataframe(test_ogr_types_list, use_arrow=True)
|
||||
assert "list_int64" in result.columns
|
||||
assert result["list_int64"][0].tolist() == [0, 1]
|
||||
|
||||
|
||||
def test_read_arrow_to_pandas_kwargs(test_fgdb_vsi):
|
||||
# with arrow, list types are supported
|
||||
arrow_to_pandas_kwargs = {"strings_to_categorical": True}
|
||||
result = read_dataframe(
|
||||
test_fgdb_vsi,
|
||||
use_arrow=True,
|
||||
arrow_to_pandas_kwargs=arrow_to_pandas_kwargs,
|
||||
)
|
||||
assert "SEGMENT_NAME" in result.columns
|
||||
assert result["SEGMENT_NAME"].dtype.name == "category"
|
||||
|
||||
|
||||
def test_read_arrow_raw(naturalearth_lowres):
|
||||
meta, table = read_arrow(naturalearth_lowres)
|
||||
assert isinstance(meta, dict)
|
||||
assert isinstance(table, pyarrow.Table)
|
||||
|
||||
|
||||
def test_open_arrow(naturalearth_lowres):
|
||||
with open_arrow(naturalearth_lowres) as (meta, reader):
|
||||
assert isinstance(meta, dict)
|
||||
assert isinstance(reader, pyarrow.RecordBatchReader)
|
||||
assert isinstance(reader.read_all(), pyarrow.Table)
|
||||
|
||||
|
||||
def test_open_arrow_batch_size(naturalearth_lowres):
|
||||
meta, table = read_arrow(naturalearth_lowres)
|
||||
batch_size = math.ceil(len(table) / 2)
|
||||
|
||||
with open_arrow(naturalearth_lowres, batch_size=batch_size) as (meta, reader):
|
||||
assert isinstance(meta, dict)
|
||||
assert isinstance(reader, pyarrow.RecordBatchReader)
|
||||
count = 0
|
||||
tables = []
|
||||
for table in reader:
|
||||
tables.append(table)
|
||||
count += 1
|
||||
|
||||
assert count == 2, "Should be two batches given the batch_size parameter"
|
||||
assert len(tables[0]) == batch_size, "First table should match the batch size"
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
__gdal_version__ >= (3, 8, 0),
|
||||
reason="skip_features supported by Arrow stream API for GDAL>=3.8.0",
|
||||
)
|
||||
@pytest.mark.parametrize("skip_features", [10, 200])
|
||||
def test_open_arrow_skip_features_unsupported(naturalearth_lowres, skip_features):
|
||||
"""skip_features are not supported for the Arrow stream interface for
|
||||
GDAL < 3.8.0"""
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="specifying 'skip_features' is not supported for Arrow for GDAL<3.8.0",
|
||||
):
|
||||
with open_arrow(naturalearth_lowres, skip_features=skip_features) as (
|
||||
meta,
|
||||
reader,
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.parametrize("max_features", [10, 200])
|
||||
def test_open_arrow_max_features_unsupported(naturalearth_lowres, max_features):
|
||||
"""max_features are not supported for the Arrow stream interface"""
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="specifying 'max_features' is not supported for Arrow",
|
||||
):
|
||||
with open_arrow(naturalearth_lowres, max_features=max_features) as (
|
||||
meta,
|
||||
reader,
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def use_arrow_context():
|
||||
original = os.environ.get("PYOGRIO_USE_ARROW", None)
|
||||
os.environ["PYOGRIO_USE_ARROW"] = "1"
|
||||
yield
|
||||
if original:
|
||||
os.environ["PYOGRIO_USE_ARROW"] = original
|
||||
else:
|
||||
del os.environ["PYOGRIO_USE_ARROW"]
|
||||
|
||||
|
||||
def test_enable_with_environment_variable(test_ogr_types_list):
|
||||
# list types are only supported with arrow, so don't work by default and work
|
||||
# when arrow is enabled through env variable
|
||||
result = read_dataframe(test_ogr_types_list)
|
||||
assert "list_int64" not in result.columns
|
||||
|
||||
with use_arrow_context():
|
||||
result = read_dataframe(test_ogr_types_list)
|
||||
assert "list_int64" in result.columns
|
||||
496
.venv/lib/python3.12/site-packages/pyogrio/tests/test_core.py
Normal file
496
.venv/lib/python3.12/site-packages/pyogrio/tests/test_core.py
Normal file
@@ -0,0 +1,496 @@
|
||||
import numpy as np
|
||||
from numpy import array_equal, allclose
|
||||
import pytest
|
||||
|
||||
from pyogrio import (
|
||||
__gdal_version__,
|
||||
__gdal_geos_version__,
|
||||
list_drivers,
|
||||
list_layers,
|
||||
read_bounds,
|
||||
read_info,
|
||||
set_gdal_config_options,
|
||||
get_gdal_config_option,
|
||||
get_gdal_data_path,
|
||||
)
|
||||
from pyogrio.core import detect_write_driver
|
||||
from pyogrio.errors import DataSourceError, DataLayerError
|
||||
from pyogrio.tests.conftest import HAS_SHAPELY, prepare_testfile
|
||||
|
||||
from pyogrio._env import GDALEnv
|
||||
|
||||
with GDALEnv():
|
||||
# NOTE: this must be AFTER above imports, which init the GDAL and PROJ data
|
||||
# search paths
|
||||
from pyogrio._ogr import ogr_driver_supports_write, has_gdal_data, has_proj_data
|
||||
|
||||
|
||||
try:
|
||||
import shapely
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def test_gdal_data():
|
||||
# test will fail if GDAL data files cannot be found, indicating an
|
||||
# installation error
|
||||
assert has_gdal_data()
|
||||
|
||||
|
||||
def test_proj_data():
|
||||
# test will fail if PROJ data files cannot be found, indicating an
|
||||
# installation error
|
||||
assert has_proj_data()
|
||||
|
||||
|
||||
def test_get_gdal_data_path():
|
||||
# test will fail if the function returns None, which means that GDAL
|
||||
# cannot find data files, indicating an installation error
|
||||
assert isinstance(get_gdal_data_path(), str)
|
||||
|
||||
|
||||
def test_gdal_geos_version():
|
||||
assert __gdal_geos_version__ is None or isinstance(__gdal_geos_version__, tuple)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"path,expected",
|
||||
[
|
||||
("test.shp", "ESRI Shapefile"),
|
||||
("test.shp.zip", "ESRI Shapefile"),
|
||||
("test.geojson", "GeoJSON"),
|
||||
("test.geojsonl", "GeoJSONSeq"),
|
||||
("test.gpkg", "GPKG"),
|
||||
pytest.param(
|
||||
"test.gpkg.zip",
|
||||
"GPKG",
|
||||
marks=pytest.mark.skipif(
|
||||
__gdal_version__ < (3, 7, 0),
|
||||
reason="writing *.gpkg.zip requires GDAL >= 3.7.0",
|
||||
),
|
||||
),
|
||||
# postgres can be detected by prefix instead of extension
|
||||
pytest.param(
|
||||
"PG:dbname=test",
|
||||
"PostgreSQL",
|
||||
marks=pytest.mark.skipif(
|
||||
"PostgreSQL" not in list_drivers(),
|
||||
reason="PostgreSQL path test requires PostgreSQL driver",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_detect_write_driver(path, expected):
|
||||
assert detect_write_driver(path) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"path",
|
||||
[
|
||||
"test.svg", # only supports read
|
||||
"test.", # not a valid extension
|
||||
"test", # no extension or prefix
|
||||
"test.foo", # not a valid extension
|
||||
"FOO:test", # not a valid prefix
|
||||
],
|
||||
)
|
||||
def test_detect_write_driver_unsupported(path):
|
||||
with pytest.raises(ValueError, match="Could not infer driver from path"):
|
||||
detect_write_driver(path)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path", ["test.xml", "test.txt"])
|
||||
def test_detect_write_driver_multiple_unsupported(path):
|
||||
with pytest.raises(ValueError, match="multiple drivers are available"):
|
||||
detect_write_driver(path)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"driver,expected",
|
||||
[
|
||||
# drivers known to be well-supported by pyogrio
|
||||
("ESRI Shapefile", True),
|
||||
("GeoJSON", True),
|
||||
("GeoJSONSeq", True),
|
||||
("GPKG", True),
|
||||
# drivers not supported for write by GDAL
|
||||
("HTTP", False),
|
||||
("OAPIF", False),
|
||||
],
|
||||
)
|
||||
def test_ogr_driver_supports_write(driver, expected):
|
||||
assert ogr_driver_supports_write(driver) == expected
|
||||
|
||||
|
||||
def test_list_drivers():
|
||||
all_drivers = list_drivers()
|
||||
|
||||
# verify that the core drivers are present
|
||||
for name in ("ESRI Shapefile", "GeoJSON", "GeoJSONSeq", "GPKG", "OpenFileGDB"):
|
||||
assert name in all_drivers
|
||||
|
||||
expected_capability = "rw"
|
||||
if name == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
|
||||
expected_capability = "r"
|
||||
|
||||
assert all_drivers[name] == expected_capability
|
||||
|
||||
drivers = list_drivers(read=True)
|
||||
expected = {k: v for k, v in all_drivers.items() if v.startswith("r")}
|
||||
assert len(drivers) == len(expected)
|
||||
|
||||
drivers = list_drivers(write=True)
|
||||
expected = {k: v for k, v in all_drivers.items() if v.endswith("w")}
|
||||
assert len(drivers) == len(expected)
|
||||
|
||||
drivers = list_drivers(read=True, write=True)
|
||||
expected = {
|
||||
k: v for k, v in all_drivers.items() if v.startswith("r") and v.endswith("w")
|
||||
}
|
||||
assert len(drivers) == len(expected)
|
||||
|
||||
|
||||
def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi):
|
||||
assert array_equal(
|
||||
list_layers(naturalearth_lowres), [["naturalearth_lowres", "Polygon"]]
|
||||
)
|
||||
|
||||
assert array_equal(
|
||||
list_layers(naturalearth_lowres_vsi[1]), [["naturalearth_lowres", "Polygon"]]
|
||||
)
|
||||
|
||||
# Measured 3D is downgraded to plain 3D during read
|
||||
# Make sure this warning is raised
|
||||
with pytest.warns(
|
||||
UserWarning, match=r"Measured \(M\) geometry types are not supported"
|
||||
):
|
||||
fgdb_layers = list_layers(test_fgdb_vsi)
|
||||
# GDAL >= 3.4.0 includes 'another_relationship' layer
|
||||
assert len(fgdb_layers) >= 7
|
||||
|
||||
# Make sure that nonspatial layer has None for geometry
|
||||
assert array_equal(fgdb_layers[0], ["basetable_2", None])
|
||||
|
||||
# Confirm that measured 3D is downgraded to plain 3D during read
|
||||
assert array_equal(fgdb_layers[3], ["test_lines", "MultiLineString Z"])
|
||||
assert array_equal(fgdb_layers[6], ["test_areas", "MultiPolygon Z"])
|
||||
|
||||
|
||||
def test_read_bounds(naturalearth_lowres):
|
||||
fids, bounds = read_bounds(naturalearth_lowres)
|
||||
assert fids.shape == (177,)
|
||||
assert bounds.shape == (4, 177)
|
||||
|
||||
assert fids[0] == 0
|
||||
# Fiji; wraps antimeridian
|
||||
assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088])
|
||||
|
||||
|
||||
def test_read_bounds_max_features(naturalearth_lowres):
|
||||
bounds = read_bounds(naturalearth_lowres, max_features=2)[1]
|
||||
assert bounds.shape == (4, 2)
|
||||
|
||||
|
||||
def test_read_bounds_negative_max_features(naturalearth_lowres):
|
||||
with pytest.raises(ValueError, match="'max_features' must be >= 0"):
|
||||
read_bounds(naturalearth_lowres, max_features=-1)
|
||||
|
||||
|
||||
def test_read_bounds_skip_features(naturalearth_lowres):
|
||||
expected_bounds = read_bounds(naturalearth_lowres, max_features=11)[1][:, 10]
|
||||
fids, bounds = read_bounds(naturalearth_lowres, skip_features=10)
|
||||
assert bounds.shape == (4, 167)
|
||||
assert allclose(bounds[:, 0], expected_bounds)
|
||||
assert fids[0] == 10
|
||||
|
||||
|
||||
def test_read_bounds_negative_skip_features(naturalearth_lowres):
|
||||
with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
|
||||
read_bounds(naturalearth_lowres, skip_features=-1)
|
||||
|
||||
|
||||
def test_read_bounds_where_invalid(naturalearth_lowres_all_ext):
|
||||
with pytest.raises(ValueError, match="Invalid SQL"):
|
||||
read_bounds(naturalearth_lowres_all_ext, where="invalid")
|
||||
|
||||
|
||||
def test_read_bounds_where(naturalearth_lowres):
|
||||
fids, bounds = read_bounds(naturalearth_lowres, where="iso_a3 = 'CAN'")
|
||||
assert fids.shape == (1,)
|
||||
assert bounds.shape == (4, 1)
|
||||
assert fids[0] == 3
|
||||
assert allclose(bounds[:, 0], [-140.99778, 41.675105, -52.648099, 83.23324])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
|
||||
def test_read_bounds_bbox_invalid(naturalearth_lowres, bbox):
|
||||
with pytest.raises(ValueError, match="Invalid bbox"):
|
||||
read_bounds(naturalearth_lowres, bbox=bbox)
|
||||
|
||||
|
||||
def test_read_bounds_bbox(naturalearth_lowres_all_ext):
|
||||
# should return no features
|
||||
fids, bounds = read_bounds(
|
||||
naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001)
|
||||
)
|
||||
|
||||
assert fids.shape == (0,)
|
||||
assert bounds.shape == (4, 0)
|
||||
|
||||
fids, bounds = read_bounds(naturalearth_lowres_all_ext, bbox=(-85, 8, -80, 10))
|
||||
|
||||
assert fids.shape == (2,)
|
||||
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
||||
# fid in gpkg is 1-based
|
||||
assert array_equal(fids, [34, 35]) # PAN, CRI
|
||||
else:
|
||||
# fid in other formats is 0-based
|
||||
assert array_equal(fids, [33, 34]) # PAN, CRI
|
||||
|
||||
assert bounds.shape == (4, 2)
|
||||
assert allclose(
|
||||
bounds.T,
|
||||
[
|
||||
[-82.96578305, 7.22054149, -77.24256649, 9.61161001],
|
||||
[-85.94172543, 8.22502798, -82.54619626, 11.21711925],
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"mask",
|
||||
[
|
||||
{"type": "Point", "coordinates": [0, 0]},
|
||||
'{"type": "Point", "coordinates": [0, 0]}',
|
||||
"invalid",
|
||||
],
|
||||
)
|
||||
def test_read_bounds_mask_invalid(naturalearth_lowres, mask):
|
||||
with pytest.raises(ValueError, match="'mask' parameter must be a Shapely geometry"):
|
||||
read_bounds(naturalearth_lowres, mask=mask)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
def test_read_bounds_bbox_mask_invalid(naturalearth_lowres):
|
||||
with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
|
||||
read_bounds(
|
||||
naturalearth_lowres, bbox=(-85, 8, -80, 10), mask=shapely.Point(-105, 55)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"mask,expected",
|
||||
[
|
||||
("POINT (-105 55)", [3]),
|
||||
("POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))", [33, 34]),
|
||||
(
|
||||
"""POLYGON ((
|
||||
6.101929 50.97085,
|
||||
5.773002 50.906611,
|
||||
5.593156 50.642649,
|
||||
6.059271 50.686052,
|
||||
6.374064 50.851481,
|
||||
6.101929 50.97085
|
||||
))""",
|
||||
[121, 129, 130],
|
||||
),
|
||||
(
|
||||
"""GEOMETRYCOLLECTION (
|
||||
POINT (-7.7 53),
|
||||
POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))
|
||||
)""",
|
||||
[33, 34, 133],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_read_bounds_mask(naturalearth_lowres_all_ext, mask, expected):
|
||||
mask = shapely.from_wkt(mask)
|
||||
|
||||
fids = read_bounds(naturalearth_lowres_all_ext, mask=mask)[0]
|
||||
|
||||
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
||||
# fid in gpkg is 1-based
|
||||
assert array_equal(fids, np.array(expected) + 1)
|
||||
else:
|
||||
# fid in other formats is 0-based
|
||||
assert array_equal(fids, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
__gdal_version__ < (3, 4, 0),
|
||||
reason="Cannot determine if GEOS is present or absent for GDAL < 3.4",
|
||||
)
|
||||
def test_read_bounds_bbox_intersects_vs_envelope_overlaps(naturalearth_lowres_all_ext):
|
||||
# If GEOS is present and used by GDAL, bbox filter will be based on intersection
|
||||
# of bbox and actual geometries; if GEOS is absent or not used by GDAL, it
|
||||
# will be based on overlap of bounding boxes instead
|
||||
fids, _ = read_bounds(naturalearth_lowres_all_ext, bbox=(-140, 20, -100, 45))
|
||||
|
||||
if __gdal_geos_version__ is None:
|
||||
# bboxes for CAN, RUS overlap but do not intersect geometries
|
||||
assert fids.shape == (4,)
|
||||
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
||||
# fid in gpkg is 1-based
|
||||
assert array_equal(fids, [4, 5, 19, 28]) # CAN, USA, RUS, MEX
|
||||
else:
|
||||
# fid in other formats is 0-based
|
||||
assert array_equal(fids, [3, 4, 18, 27]) # CAN, USA, RUS, MEX
|
||||
|
||||
else:
|
||||
assert fids.shape == (2,)
|
||||
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
||||
# fid in gpkg is 1-based
|
||||
assert array_equal(fids, [5, 28]) # USA, MEX
|
||||
else:
|
||||
# fid in other formats is 0-based
|
||||
assert array_equal(fids, [4, 27]) # USA, MEX
|
||||
|
||||
|
||||
def test_read_info(naturalearth_lowres):
|
||||
meta = read_info(naturalearth_lowres)
|
||||
|
||||
assert meta["crs"] == "EPSG:4326"
|
||||
assert meta["geometry_type"] == "Polygon"
|
||||
assert meta["encoding"] == "UTF-8"
|
||||
assert meta["fields"].shape == (5,)
|
||||
assert meta["dtypes"].tolist() == ["int64", "object", "object", "object", "float64"]
|
||||
assert meta["features"] == 177
|
||||
assert allclose(meta["total_bounds"], (-180, -90, 180, 83.64513))
|
||||
assert meta["driver"] == "ESRI Shapefile"
|
||||
assert meta["capabilities"]["random_read"] is True
|
||||
assert meta["capabilities"]["fast_set_next_by_index"] is True
|
||||
assert meta["capabilities"]["fast_spatial_filter"] is False
|
||||
assert meta["capabilities"]["fast_feature_count"] is True
|
||||
assert meta["capabilities"]["fast_total_bounds"] is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_kwargs,fields",
|
||||
[
|
||||
({}, ["top_level", "intermediate_level"]),
|
||||
(
|
||||
{"FLATTEN_NESTED_ATTRIBUTES": "YES"},
|
||||
[
|
||||
"top_level",
|
||||
"intermediate_level_bottom_level",
|
||||
],
|
||||
),
|
||||
(
|
||||
{"flatten_nested_attributes": "yes"},
|
||||
[
|
||||
"top_level",
|
||||
"intermediate_level_bottom_level",
|
||||
],
|
||||
),
|
||||
(
|
||||
{"flatten_nested_attributes": True},
|
||||
[
|
||||
"top_level",
|
||||
"intermediate_level_bottom_level",
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_read_info_dataset_kwargs(data_dir, dataset_kwargs, fields):
|
||||
meta = read_info(data_dir / "test_nested.geojson", **dataset_kwargs)
|
||||
assert meta["fields"].tolist() == fields
|
||||
|
||||
|
||||
def test_read_info_invalid_dataset_kwargs(naturalearth_lowres):
|
||||
with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
|
||||
read_info(naturalearth_lowres, INVALID="YES")
|
||||
|
||||
|
||||
def test_read_info_force_feature_count_exception(data_dir):
|
||||
with pytest.raises(DataLayerError, match="Could not iterate over features"):
|
||||
read_info(data_dir / "sample.osm.pbf", layer="lines", force_feature_count=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"layer, force, expected",
|
||||
[
|
||||
("points", False, -1),
|
||||
("points", True, 8),
|
||||
("lines", False, -1),
|
||||
("lines", True, 36),
|
||||
],
|
||||
)
|
||||
def test_read_info_force_feature_count(data_dir, layer, force, expected):
|
||||
# the sample OSM file has non-increasing node IDs which causes the default
|
||||
# custom indexing to raise an exception iterating over features
|
||||
meta = read_info(
|
||||
data_dir / "sample.osm.pbf",
|
||||
layer=layer,
|
||||
force_feature_count=force,
|
||||
USE_CUSTOM_INDEXING=False,
|
||||
)
|
||||
assert meta["features"] == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"force_total_bounds, expected_total_bounds",
|
||||
[(True, (-180.0, -90.0, 180.0, 83.64513)), (False, None)],
|
||||
)
|
||||
def test_read_info_force_total_bounds(
|
||||
tmpdir, naturalearth_lowres, force_total_bounds, expected_total_bounds
|
||||
):
|
||||
# Geojson files don't hava a fast way to determine total_bounds
|
||||
geojson_path = prepare_testfile(naturalearth_lowres, dst_dir=tmpdir, ext=".geojson")
|
||||
info = read_info(geojson_path, force_total_bounds=force_total_bounds)
|
||||
if expected_total_bounds is not None:
|
||||
assert allclose(info["total_bounds"], expected_total_bounds)
|
||||
else:
|
||||
assert info["total_bounds"] is None
|
||||
|
||||
|
||||
def test_read_info_without_geometry(test_fgdb_vsi):
|
||||
assert read_info(test_fgdb_vsi)["total_bounds"] is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name,value,expected",
|
||||
[
|
||||
("CPL_DEBUG", "ON", True),
|
||||
("CPL_DEBUG", True, True),
|
||||
("CPL_DEBUG", "OFF", False),
|
||||
("CPL_DEBUG", False, False),
|
||||
],
|
||||
)
|
||||
def test_set_config_options(name, value, expected):
|
||||
set_gdal_config_options({name: value})
|
||||
actual = get_gdal_config_option(name)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_reset_config_options():
|
||||
set_gdal_config_options({"foo": "bar"})
|
||||
assert get_gdal_config_option("foo") == "bar"
|
||||
|
||||
set_gdal_config_options({"foo": None})
|
||||
assert get_gdal_config_option("foo") is None
|
||||
|
||||
|
||||
def test_error_handling(capfd):
|
||||
# an operation that triggers a GDAL Failure
|
||||
# -> error translated into Python exception + not printed to stderr
|
||||
with pytest.raises(DataSourceError, match="No such file or directory"):
|
||||
read_info("non-existent.shp")
|
||||
|
||||
assert capfd.readouterr().err == ""
|
||||
|
||||
|
||||
def test_error_handling_warning(capfd, naturalearth_lowres):
|
||||
# an operation that triggers a GDAL Warning
|
||||
# -> translated into a Python warning + not printed to stderr
|
||||
with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
|
||||
read_info(naturalearth_lowres, INVALID="YES")
|
||||
|
||||
assert capfd.readouterr().err == ""
|
||||
File diff suppressed because it is too large
Load Diff
332
.venv/lib/python3.12/site-packages/pyogrio/tests/test_path.py
Normal file
332
.venv/lib/python3.12/site-packages/pyogrio/tests/test_path.py
Normal file
@@ -0,0 +1,332 @@
|
||||
import os
|
||||
import contextlib
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
import pytest
|
||||
|
||||
import pyogrio
|
||||
import pyogrio.raw
|
||||
from pyogrio.util import vsi_path
|
||||
|
||||
try:
|
||||
import geopandas # NOQA
|
||||
|
||||
has_geopandas = True
|
||||
except ImportError:
|
||||
has_geopandas = False
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def change_cwd(path):
|
||||
curdir = os.getcwd()
|
||||
os.chdir(str(path))
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
os.chdir(curdir)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"path, expected",
|
||||
[
|
||||
# local file paths that should be passed through as is
|
||||
("data.gpkg", "data.gpkg"),
|
||||
("/home/user/data.gpkg", "/home/user/data.gpkg"),
|
||||
(r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"),
|
||||
("file:///home/user/data.gpkg", "/home/user/data.gpkg"),
|
||||
# cloud URIs
|
||||
("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"),
|
||||
("s3://testing/data.gpkg", "/vsis3/testing/data.gpkg"),
|
||||
("gs://testing/data.gpkg", "/vsigs/testing/data.gpkg"),
|
||||
("az://testing/data.gpkg", "/vsiaz/testing/data.gpkg"),
|
||||
("adl://testing/data.gpkg", "/vsiadls/testing/data.gpkg"),
|
||||
("adls://testing/data.gpkg", "/vsiadls/testing/data.gpkg"),
|
||||
("hdfs://testing/data.gpkg", "/vsihdfs/testing/data.gpkg"),
|
||||
("webhdfs://testing/data.gpkg", "/vsiwebhdfs/testing/data.gpkg"),
|
||||
# archives
|
||||
("zip://data.zip", "/vsizip/data.zip"),
|
||||
("tar://data.tar", "/vsitar/data.tar"),
|
||||
("gzip://data.gz", "/vsigzip/data.gz"),
|
||||
("tar://./my.tar!my.geojson", "/vsitar/./my.tar/my.geojson"),
|
||||
(
|
||||
"zip://home/data/shapefile.zip!layer.shp",
|
||||
"/vsizip/home/data/shapefile.zip/layer.shp",
|
||||
),
|
||||
# combined schemes
|
||||
("zip+s3://testing/shapefile.zip", "/vsizip/vsis3/testing/shapefile.zip"),
|
||||
(
|
||||
"zip+https://s3.amazonaws.com/testing/shapefile.zip",
|
||||
"/vsizip/vsicurl/https://s3.amazonaws.com/testing/shapefile.zip",
|
||||
),
|
||||
# auto-prefix zip files
|
||||
("test.zip", "/vsizip/test.zip"),
|
||||
("/a/b/test.zip", "/vsizip//a/b/test.zip"),
|
||||
("a/b/test.zip", "/vsizip/a/b/test.zip"),
|
||||
# archives using ! notation should be prefixed by vsizip
|
||||
("test.zip!item.shp", "/vsizip/test.zip/item.shp"),
|
||||
("test.zip!/a/b/item.shp", "/vsizip/test.zip/a/b/item.shp"),
|
||||
("test.zip!a/b/item.shp", "/vsizip/test.zip/a/b/item.shp"),
|
||||
("/vsizip/test.zip/a/b/item.shp", "/vsizip/test.zip/a/b/item.shp"),
|
||||
("zip:///test.zip/a/b/item.shp", "/vsizip//test.zip/a/b/item.shp"),
|
||||
# auto-prefix remote zip files
|
||||
(
|
||||
"https://s3.amazonaws.com/testing/test.zip",
|
||||
"/vsizip/vsicurl/https://s3.amazonaws.com/testing/test.zip",
|
||||
),
|
||||
(
|
||||
"https://s3.amazonaws.com/testing/test.zip!/a/b/item.shp",
|
||||
"/vsizip/vsicurl/https://s3.amazonaws.com/testing/test.zip/a/b/item.shp",
|
||||
),
|
||||
("s3://testing/test.zip", "/vsizip/vsis3/testing/test.zip"),
|
||||
(
|
||||
"s3://testing/test.zip!a/b/item.shp",
|
||||
"/vsizip/vsis3/testing/test.zip/a/b/item.shp",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_vsi_path(path, expected):
|
||||
assert vsi_path(path) == expected
|
||||
|
||||
|
||||
def test_vsi_path_unknown():
|
||||
# unrecognized URI gets passed through as is
|
||||
assert vsi_path("s4://test/data.geojson") == "s4://test/data.geojson"
|
||||
|
||||
|
||||
def test_vsi_handling_read_functions(naturalearth_lowres_vsi):
|
||||
# test that all different read entry points have the path handling
|
||||
# (a zip:// path would otherwise fail)
|
||||
path, _ = naturalearth_lowres_vsi
|
||||
path = "zip://" + str(path)
|
||||
|
||||
result = pyogrio.raw.read(path)
|
||||
assert len(result[2]) == 177
|
||||
|
||||
result = pyogrio.read_info(path)
|
||||
assert result["features"] == 177
|
||||
|
||||
result = pyogrio.read_bounds(path)
|
||||
assert len(result[0]) == 177
|
||||
|
||||
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_vsi_handling_read_dataframe(naturalearth_lowres_vsi):
|
||||
path, _ = naturalearth_lowres_vsi
|
||||
path = "zip://" + str(path)
|
||||
|
||||
result = pyogrio.read_dataframe(path)
|
||||
assert len(result) == 177
|
||||
|
||||
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_path_absolute(data_dir):
|
||||
# pathlib path
|
||||
path = data_dir / "naturalearth_lowres/naturalearth_lowres.shp"
|
||||
df = pyogrio.read_dataframe(path)
|
||||
assert len(df) == 177
|
||||
|
||||
# str path
|
||||
df = pyogrio.read_dataframe(str(path))
|
||||
assert len(df) == 177
|
||||
|
||||
|
||||
def test_path_relative(data_dir):
|
||||
path = "naturalearth_lowres/naturalearth_lowres.shp"
|
||||
|
||||
with change_cwd(data_dir):
|
||||
result = pyogrio.raw.read(path)
|
||||
assert len(result[2]) == 177
|
||||
|
||||
result = pyogrio.read_info(path)
|
||||
assert result["features"] == 177
|
||||
|
||||
result = pyogrio.read_bounds(path)
|
||||
assert len(result[0]) == 177
|
||||
|
||||
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_path_relative_dataframe(data_dir):
|
||||
with change_cwd(data_dir):
|
||||
df = pyogrio.read_dataframe("naturalearth_lowres/naturalearth_lowres.shp")
|
||||
assert len(df) == 177
|
||||
|
||||
|
||||
def test_uri_local_file(data_dir):
|
||||
path = "file://" + str(data_dir / "naturalearth_lowres/naturalearth_lowres.shp")
|
||||
result = pyogrio.raw.read(path)
|
||||
assert len(result[2]) == 177
|
||||
|
||||
result = pyogrio.read_info(path)
|
||||
assert result["features"] == 177
|
||||
|
||||
result = pyogrio.read_bounds(path)
|
||||
assert len(result[0]) == 177
|
||||
|
||||
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_uri_local_file_dataframe(data_dir):
|
||||
uri = "file://" + str(data_dir / "naturalearth_lowres/naturalearth_lowres.shp")
|
||||
df = pyogrio.read_dataframe(uri)
|
||||
assert len(df) == 177
|
||||
|
||||
|
||||
def test_zip_path(naturalearth_lowres_vsi):
|
||||
path, path_vsi = naturalearth_lowres_vsi
|
||||
path_zip = "zip://" + str(path)
|
||||
|
||||
# absolute zip path
|
||||
result = pyogrio.raw.read(path_zip)
|
||||
assert len(result[2]) == 177
|
||||
|
||||
result = pyogrio.read_info(path_zip)
|
||||
assert result["features"] == 177
|
||||
|
||||
result = pyogrio.read_bounds(path_zip)
|
||||
assert len(result[0]) == 177
|
||||
|
||||
# absolute vsizip path
|
||||
result = pyogrio.raw.read(path_vsi)
|
||||
assert len(result[2]) == 177
|
||||
|
||||
result = pyogrio.read_info(path_vsi)
|
||||
assert result["features"] == 177
|
||||
|
||||
result = pyogrio.read_bounds(path_vsi)
|
||||
assert len(result[0]) == 177
|
||||
|
||||
# relative zip path
|
||||
relative_path = "zip://" + path.name
|
||||
with change_cwd(path.parent):
|
||||
result = pyogrio.raw.read(relative_path)
|
||||
assert len(result[2]) == 177
|
||||
|
||||
result = pyogrio.read_info(relative_path)
|
||||
assert result["features"] == 177
|
||||
|
||||
result = pyogrio.read_bounds(relative_path)
|
||||
assert len(result[0]) == 177
|
||||
|
||||
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_zip_path_dataframe(naturalearth_lowres_vsi):
|
||||
path, path_vsi = naturalearth_lowres_vsi
|
||||
path_zip = "zip://" + str(path)
|
||||
|
||||
# absolute zip path
|
||||
df = pyogrio.read_dataframe(path_zip)
|
||||
assert len(df) == 177
|
||||
|
||||
# absolute vsizip path
|
||||
df = pyogrio.read_dataframe(path_vsi)
|
||||
assert len(df) == 177
|
||||
|
||||
# relative zip path
|
||||
with change_cwd(path.parent):
|
||||
df = pyogrio.read_dataframe("zip://" + path.name)
|
||||
assert len(df) == 177
|
||||
|
||||
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_detect_zip_path(tmp_path, naturalearth_lowres):
|
||||
# create a zipfile with 2 shapefiles in a set of subdirectories
|
||||
df = pyogrio.read_dataframe(naturalearth_lowres, where="iso_a3 in ('CAN', 'PER')")
|
||||
pyogrio.write_dataframe(df.loc[df.iso_a3 == "CAN"], tmp_path / "test1.shp")
|
||||
pyogrio.write_dataframe(df.loc[df.iso_a3 == "PER"], tmp_path / "test2.shp")
|
||||
|
||||
path = tmp_path / "test.zip"
|
||||
with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out:
|
||||
for ext in ["dbf", "prj", "shp", "shx"]:
|
||||
filename = f"test1.{ext}"
|
||||
out.write(tmp_path / filename, filename)
|
||||
|
||||
filename = f"test2.{ext}"
|
||||
out.write(tmp_path / filename, f"/a/b/{filename}")
|
||||
|
||||
# defaults to the first shapefile found, at lowest subdirectory
|
||||
df = pyogrio.read_dataframe(path)
|
||||
assert df.iso_a3[0] == "CAN"
|
||||
|
||||
# selecting a shapefile from within the zip requires "!"" archive specifier
|
||||
df = pyogrio.read_dataframe(f"{path}!test1.shp")
|
||||
assert df.iso_a3[0] == "CAN"
|
||||
|
||||
df = pyogrio.read_dataframe(f"{path}!/a/b/test2.shp")
|
||||
assert df.iso_a3[0] == "PER"
|
||||
|
||||
# specifying zip:// scheme should also work
|
||||
df = pyogrio.read_dataframe(f"zip://{path}!/a/b/test2.shp")
|
||||
assert df.iso_a3[0] == "PER"
|
||||
|
||||
# specifying /vsizip/ should also work but path must already be in GDAL ready
|
||||
# format without the "!"" archive specifier
|
||||
df = pyogrio.read_dataframe(f"/vsizip/{path}/a/b/test2.shp")
|
||||
assert df.iso_a3[0] == "PER"
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_url():
|
||||
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp" # NOQA
|
||||
|
||||
result = pyogrio.raw.read(url)
|
||||
assert len(result[2]) == 177
|
||||
|
||||
result = pyogrio.read_info(url)
|
||||
assert result["features"] == 177
|
||||
|
||||
result = pyogrio.read_bounds(url)
|
||||
assert len(result[0]) == 177
|
||||
|
||||
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_url_dataframe():
|
||||
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp" # NOQA
|
||||
|
||||
assert len(pyogrio.read_dataframe(url)) == 177
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_url_with_zip():
|
||||
url = "zip+https://s3.amazonaws.com/fiona-testing/coutwildrnp.zip"
|
||||
|
||||
result = pyogrio.raw.read(url)
|
||||
assert len(result[2]) == 67
|
||||
|
||||
result = pyogrio.read_info(url)
|
||||
assert result["features"] == 67
|
||||
|
||||
result = pyogrio.read_bounds(url)
|
||||
assert len(result[0]) == 67
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_url_with_zip_dataframe():
|
||||
url = "zip+https://s3.amazonaws.com/fiona-testing/coutwildrnp.zip"
|
||||
df = pyogrio.read_dataframe(url)
|
||||
assert len(df) == 67
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def aws_env_setup(monkeypatch):
|
||||
monkeypatch.setenv("AWS_NO_SIGN_REQUEST", "YES")
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_uri_s3(aws_env_setup):
|
||||
url = "zip+s3://fiona-testing/coutwildrnp.zip"
|
||||
|
||||
result = pyogrio.raw.read(url)
|
||||
assert len(result[2]) == 67
|
||||
|
||||
result = pyogrio.read_info(url)
|
||||
assert result["features"] == 67
|
||||
|
||||
result = pyogrio.read_bounds(url)
|
||||
assert len(result[0]) == 67
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_uri_s3_dataframe(aws_env_setup):
|
||||
df = pyogrio.read_dataframe("zip+s3://fiona-testing/coutwildrnp.zip")
|
||||
assert len(df) == 67
|
||||
1178
.venv/lib/python3.12/site-packages/pyogrio/tests/test_raw_io.py
Normal file
1178
.venv/lib/python3.12/site-packages/pyogrio/tests/test_raw_io.py
Normal file
File diff suppressed because it is too large
Load Diff
86
.venv/lib/python3.12/site-packages/pyogrio/tests/win32.py
Normal file
86
.venv/lib/python3.12/site-packages/pyogrio/tests/win32.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""Run pytest tests manually on Windows due to import errors
|
||||
"""
|
||||
from pathlib import Path
|
||||
import platform
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
|
||||
data_dir = Path(__file__).parent.resolve() / "fixtures"
|
||||
|
||||
if platform.system() == "Windows":
|
||||
|
||||
naturalearth_lowres = data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp")
|
||||
test_fgdb_vsi = f"/vsizip/{data_dir}/test_fgdb.gdb.zip"
|
||||
|
||||
from pyogrio.tests.test_core import test_read_info
|
||||
|
||||
try:
|
||||
test_read_info(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
from pyogrio.tests.test_raw_io import (
|
||||
test_read,
|
||||
test_read_no_geometry,
|
||||
test_read_columns,
|
||||
test_read_skip_features,
|
||||
test_read_max_features,
|
||||
test_read_where,
|
||||
test_read_where_invalid,
|
||||
test_write,
|
||||
test_write_gpkg,
|
||||
test_write_geojson,
|
||||
)
|
||||
|
||||
try:
|
||||
test_read(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_no_geometry(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_columns(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_skip_features(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_max_features(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_where(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_where_invalid(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
try:
|
||||
test_write(tmpdir, naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
try:
|
||||
test_write_gpkg(tmpdir, naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
try:
|
||||
test_write_geojson(tmpdir, naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
Reference in New Issue
Block a user