that's too much!

2024-12-19 20:22:56 -08:00
parent 0020a609dd
commit 32cd60e92b
8443 changed files with 1446950 additions and 42 deletions
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/init.py
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/init.py
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/conftest.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/conftest.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_arrow.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_arrow.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_core.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_core.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_geopandas_io.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_geopandas_io.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_path.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_path.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_raw_io.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/test_raw_io.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/win32.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/pycache/win32.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/conftest.py
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/conftest.py
@@ -0,0 +1,135 @@
+from pathlib import Path
+from zipfile import ZipFile, ZIP_DEFLATED
+
+import pytest
+
+from pyogrio import (
+    __gdal_version_string__,
+    __version__,
+    list_drivers,
+)
+from pyogrio._compat import HAS_ARROW_API, HAS_GDAL_GEOS, HAS_SHAPELY
+from pyogrio.raw import read, write
+
+
+_data_dir = Path(__file__).parent.resolve() / "fixtures"
+
+# mapping of driver extension to driver name for well-supported drivers
+DRIVERS = {
+    ".fgb": "FlatGeobuf",
+    ".geojson": "GeoJSON",
+    ".geojsonl": "GeoJSONSeq",
+    ".geojsons": "GeoJSONSeq",
+    ".gpkg": "GPKG",
+    ".shp": "ESRI Shapefile",
+}
+
+# mapping of driver name to extension
+DRIVER_EXT = {driver: ext for ext, driver in DRIVERS.items()}
+
+ALL_EXTS = [".fgb", ".geojson", ".geojsonl", ".gpkg", ".shp"]
+
+
+def pytest_report_header(config):
+    drivers = ", ".join(
+        f"{driver}({capability})"
+        for driver, capability in sorted(list_drivers().items())
+    )
+    return (
+        f"pyogrio {__version__}\n"
+        f"GDAL {__gdal_version_string__}\n"
+        f"Supported drivers: {drivers}"
+    )
+
+
+# marks to skip tests if optional dependecies are not present
+requires_arrow_api = pytest.mark.skipif(
+    not HAS_ARROW_API, reason="GDAL>=3.6 and pyarrow required"
+)
+
+requires_gdal_geos = pytest.mark.skipif(
+    not HAS_GDAL_GEOS, reason="GDAL compiled with GEOS required"
+)
+
+requires_shapely = pytest.mark.skipif(not HAS_SHAPELY, reason="Shapely >= 2.0 required")
+
+
+def prepare_testfile(testfile_path, dst_dir, ext):
+    if ext == testfile_path.suffix:
+        return testfile_path
+
+    dst_path = dst_dir / f"{testfile_path.stem}{ext}"
+    if dst_path.exists():
+        return dst_path
+
+    meta, _, geometry, field_data = read(testfile_path)
+
+    if ext == ".fgb":
+        # For .fgb, spatial_index=False to avoid the rows being reordered
+        meta["spatial_index"] = False
+        # allow mixed Polygons/MultiPolygons type
+        meta["geometry_type"] = "Unknown"
+
+    elif ext == ".gpkg":
+        # For .gpkg, spatial_index=False to avoid the rows being reordered
+        meta["spatial_index"] = False
+        meta["geometry_type"] = "MultiPolygon"
+
+    write(dst_path, geometry, field_data, **meta)
+    return dst_path
+
+
+@pytest.fixture(scope="session")
+def data_dir():
+    return _data_dir
+
+
+@pytest.fixture(scope="function")
+def naturalearth_lowres(tmp_path, request):
+    ext = getattr(request, "param", ".shp")
+    testfile_path = _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp")
+
+    return prepare_testfile(testfile_path, tmp_path, ext)
+
+
+@pytest.fixture(scope="function", params=ALL_EXTS)
+def naturalearth_lowres_all_ext(tmp_path, naturalearth_lowres, request):
+    return prepare_testfile(naturalearth_lowres, tmp_path, request.param)
+
+
+@pytest.fixture(scope="function")
+def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres):
+    """Wrap naturalearth_lowres as a zip file for vsi tests"""
+
+    path = tmp_path / f"{naturalearth_lowres.name}.zip"
+    with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out:
+        for ext in ["dbf", "prj", "shp", "shx"]:
+            filename = f"{naturalearth_lowres.stem}.{ext}"
+            out.write(naturalearth_lowres.parent / filename, filename)
+
+    return path, f"/vsizip/{path}/{naturalearth_lowres.name}"
+
+
+@pytest.fixture(scope="session")
+def test_fgdb_vsi():
+    return f"/vsizip/{_data_dir}/test_fgdb.gdb.zip"
+
+
+@pytest.fixture(scope="session")
+def test_gpkg_nulls():
+    return _data_dir / "test_gpkg_nulls.gpkg"
+
+
+@pytest.fixture(scope="session")
+def test_ogr_types_list():
+    return _data_dir / "test_ogr_types_list.geojson"
+
+
+@pytest.fixture(scope="session")
+def test_datetime():
+    return _data_dir / "test_datetime.geojson"
+
+
+@pytest.fixture(scope="session")
+def test_datetime_tz():
+    return _data_dir / "test_datetime_tz.geojson"
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/README.md
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/README.md
@@ -0,0 +1,89 @@
+# Test datasets
+
+## Natural Earth lowres
+
+`naturalearth_lowres.shp` was copied from GeoPandas.
+
+## FGDB test dataset
+
+`test_fgdb.gdb.zip`
+Downloaded from http://trac.osgeo.org/gdal/raw-attachment/wiki/FileGDB/test_fgdb.gdb.zip
+
+### GPKG test dataset with null values
+
+`test_gpkg_nulls.gpkg` was created using Fiona backend to GeoPandas:
+
+```
+from collections import OrderedDict
+
+import fiona
+import geopandas as gp
+import numpy as np
+from pyogrio import write_dataframe
+
+filename = "test_gpkg_nulls.gpkg"
+
+df = gp.GeoDataFrame(
+    {
+        "col_bool": np.array([True, False, True], dtype="bool"),
+        "col_int8": np.array([1, 2, 3], dtype="int8"),
+        "col_int16": np.array([1, 2, 3], dtype="int16"),
+        "col_int32": np.array([1, 2, 3], dtype="int32"),
+        "col_int64": np.array([1, 2, 3], dtype="int64"),
+        "col_uint8": np.array([1, 2, 3], dtype="uint8"),
+        "col_uint16": np.array([1, 2, 3], dtype="uint16"),
+        "col_uint32": np.array([1, 2, 3], dtype="uint32"),
+        "col_uint64": np.array([1, 2, 3], dtype="uint64"),
+        "col_float32": np.array([1.5, 2.5, 3.5], dtype="float32"),
+        "col_float64": np.array([1.5, 2.5, 3.5], dtype="float64"),
+    },
+    geometry=gp.points_from_xy([0, 1, 2], [0, 1, 2]),
+    crs="EPSG:4326",
+)
+
+write_dataframe(df, filename)
+
+# construct row with null values
+# Note: np.nan can only be used for float values
+null_row = {
+    "type": "Fetaure",
+    "id": 4,
+    "properties": OrderedDict(
+        [
+            ("col_bool", None),
+            ("col_int8", None),
+            ("col_int16", None),
+            ("col_int32", None),
+            ("col_int64", None),
+            ("col_uint8", None),
+            ("col_uint16", None),
+            ("col_uint32", None),
+            ("col_uint64", None),
+            ("col_float32", np.nan),
+            ("col_float64", np.nan),
+        ]
+    ),
+    "geometry": {"type": "Point", "coordinates": (4.0, 4.0)},
+}
+
+# append row with nulls to GPKG
+with fiona.open(filename, "a") as c:
+    c.write(null_row)
+```
+
+NOTE: Reading boolean values into GeoPandas using Fiona backend treats those
+values as `None` and column dtype as `object`; Pyogrio treats those values as
+`np.nan` and column dtype as `float64`.
+
+### GPKG test with MultiSurface
+
+This was extracted from https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_0308_HU4_GDB.zip
+`NHDWaterbody` layer using ogr2ogr:
+
+```bash
+ogr2ogr test_mixed_surface.gpkg NHDPLUS_H_0308_HU4_GDB.gdb NHDWaterbody -where '"NHDPlusID" = 15000300070477' -select "NHDPlusID"
+```
+
+### OSM PBF test
+
+This was downloaded from https://github.com/openstreetmap/OSM-binary/blob/master/resources/sample.pbf
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg
@@ -0,0 +1 @@
+ISO-8859-1
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj
@@ -0,0 +1 @@
+GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/sample.osm.pbf
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/sample.osm.pbf
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_datetime.geojson
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_datetime.geojson
@@ -0,0 +1,7 @@
+{
+"type": "FeatureCollection",
+"features": [
+{ "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
+{ "type": "Feature", "properties": { "col": "2020-01-01T10:00:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
+]
+}
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_datetime_tz.geojson
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_datetime_tz.geojson
@@ -0,0 +1,8 @@
+{
+"type": "FeatureCollection",
+"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
+"features": [
+{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T09:00:00.123-05:00" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
+{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T10:00:00-05:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
+]
+}
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_fgdb.gdb.zip
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_fgdb.gdb.zip
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_multisurface.gpkg
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_multisurface.gpkg
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_nested.geojson
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_nested.geojson
@@ -0,0 +1,18 @@
+{
+    "type": "FeatureCollection",
+    "features": [
+        {
+            "type": "Feature",
+            "geometry": {
+                "type": "Point",
+                "coordinates": [0, 0]
+            },
+            "properties": {
+                "top_level": "A",
+                "intermediate_level": {
+                    "bottom_level": "B"
+                }
+            }
+        }
+    ]
+}
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_ogr_types_list.geojson
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/fixtures/test_ogr_types_list.geojson
@@ -0,0 +1,12 @@
+{
+"type": "FeatureCollection",
+"name": "test",
+"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
+"features": [
+{ "type": "Feature", "properties": { "int64": 1, "list_int64": [ 0, 1 ] }, "geometry": { "type": "Point", "coordinates": [ 0.0, 2.0 ] } },
+{ "type": "Feature", "properties": { "int64": 2, "list_int64": [ 2, 3 ] }, "geometry": { "type": "Point", "coordinates": [ 1.0, 2.0 ] } },
+{ "type": "Feature", "properties": { "int64": 3, "list_int64": [ 4, 5 ] }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } },
+{ "type": "Feature", "properties": { "int64": 4, "list_int64": [ 6, 7 ] }, "geometry": { "type": "Point", "coordinates": [ 3.0, 2.0 ] } },
+{ "type": "Feature", "properties": { "int64": 5, "list_int64": [ 8, 9 ] }, "geometry": { "type": "Point", "coordinates": [ 4.0, 2.0 ] } }
+]
+}
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/test_arrow.py
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/test_arrow.py
@@ -0,0 +1,207 @@
+import contextlib
+import math
+import os
+
+import pytest
+
+from pyogrio import __gdal_version__, read_dataframe
+from pyogrio.raw import open_arrow, read_arrow
+from pyogrio.tests.conftest import requires_arrow_api
+
+try:
+    import pandas as pd
+    from pandas.testing import assert_frame_equal, assert_index_equal
+    from geopandas.testing import assert_geodataframe_equal
+
+    import pyarrow
+except ImportError:
+    pass
+
+# skip all tests in this file if Arrow API or GeoPandas are unavailable
+pytestmark = requires_arrow_api
+pytest.importorskip("geopandas")
+
+
+def test_read_arrow(naturalearth_lowres_all_ext):
+    result = read_dataframe(naturalearth_lowres_all_ext, use_arrow=True)
+    expected = read_dataframe(naturalearth_lowres_all_ext, use_arrow=False)
+
+    if naturalearth_lowres_all_ext.suffix.startswith(".geojson"):
+        check_less_precise = True
+    else:
+        check_less_precise = False
+    assert_geodataframe_equal(result, expected, check_less_precise=check_less_precise)
+
+
+@pytest.mark.parametrize("skip_features, expected", [(10, 167), (200, 0)])
+def test_read_arrow_skip_features(naturalearth_lowres, skip_features, expected):
+    table = read_arrow(naturalearth_lowres, skip_features=skip_features)[1]
+    assert len(table) == expected
+
+
+def test_read_arrow_negative_skip_features(naturalearth_lowres):
+    with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
+        read_arrow(naturalearth_lowres, skip_features=-1)
+
+
+@pytest.mark.parametrize(
+    "max_features, expected", [(0, 0), (10, 10), (200, 177), (100000, 177)]
+)
+def test_read_arrow_max_features(naturalearth_lowres, max_features, expected):
+    table = read_arrow(naturalearth_lowres, max_features=max_features)[1]
+    assert len(table) == expected
+
+
+def test_read_arrow_negative_max_features(naturalearth_lowres):
+    with pytest.raises(ValueError, match="'max_features' must be >= 0"):
+        read_arrow(naturalearth_lowres, max_features=-1)
+
+
+@pytest.mark.parametrize(
+    "skip_features, max_features, expected",
+    [
+        (0, 0, 0),
+        (10, 0, 0),
+        (200, 0, 0),
+        (1, 200, 176),
+        (176, 10, 1),
+        (100, 100, 77),
+        (100, 100000, 77),
+    ],
+)
+def test_read_arrow_skip_features_max_features(
+    naturalearth_lowres, skip_features, max_features, expected
+):
+    table = read_arrow(
+        naturalearth_lowres, skip_features=skip_features, max_features=max_features
+    )[1]
+    assert len(table) == expected
+
+
+def test_read_arrow_fid(naturalearth_lowres_all_ext):
+    kwargs = {"use_arrow": True, "where": "fid >= 2 AND fid <= 3"}
+
+    df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
+    assert_index_equal(df.index, pd.RangeIndex(0, 2))
+
+    df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=True, **kwargs)
+    assert_index_equal(df.index, pd.Index([2, 3], name="fid"))
+
+
+def test_read_arrow_columns(naturalearth_lowres):
+    result = read_dataframe(naturalearth_lowres, use_arrow=True, columns=["continent"])
+    assert result.columns.tolist() == ["continent", "geometry"]
+
+
+def test_read_arrow_ignore_geometry(naturalearth_lowres):
+    result = read_dataframe(naturalearth_lowres, use_arrow=True, read_geometry=False)
+    assert type(result) is pd.DataFrame
+
+    expected = read_dataframe(naturalearth_lowres, use_arrow=True).drop(
+        columns=["geometry"]
+    )
+    assert_frame_equal(result, expected)
+
+
+def test_read_arrow_nested_types(test_ogr_types_list):
+    # with arrow, list types are supported
+    result = read_dataframe(test_ogr_types_list, use_arrow=True)
+    assert "list_int64" in result.columns
+    assert result["list_int64"][0].tolist() == [0, 1]
+
+
+def test_read_arrow_to_pandas_kwargs(test_fgdb_vsi):
+    # with arrow, list types are supported
+    arrow_to_pandas_kwargs = {"strings_to_categorical": True}
+    result = read_dataframe(
+        test_fgdb_vsi,
+        use_arrow=True,
+        arrow_to_pandas_kwargs=arrow_to_pandas_kwargs,
+    )
+    assert "SEGMENT_NAME" in result.columns
+    assert result["SEGMENT_NAME"].dtype.name == "category"
+
+
+def test_read_arrow_raw(naturalearth_lowres):
+    meta, table = read_arrow(naturalearth_lowres)
+    assert isinstance(meta, dict)
+    assert isinstance(table, pyarrow.Table)
+
+
+def test_open_arrow(naturalearth_lowres):
+    with open_arrow(naturalearth_lowres) as (meta, reader):
+        assert isinstance(meta, dict)
+        assert isinstance(reader, pyarrow.RecordBatchReader)
+        assert isinstance(reader.read_all(), pyarrow.Table)
+
+
+def test_open_arrow_batch_size(naturalearth_lowres):
+    meta, table = read_arrow(naturalearth_lowres)
+    batch_size = math.ceil(len(table) / 2)
+
+    with open_arrow(naturalearth_lowres, batch_size=batch_size) as (meta, reader):
+        assert isinstance(meta, dict)
+        assert isinstance(reader, pyarrow.RecordBatchReader)
+        count = 0
+        tables = []
+        for table in reader:
+            tables.append(table)
+            count += 1
+
+        assert count == 2, "Should be two batches given the batch_size parameter"
+        assert len(tables[0]) == batch_size, "First table should match the batch size"
+
+
+@pytest.mark.skipif(
+    __gdal_version__ >= (3, 8, 0),
+    reason="skip_features supported by Arrow stream API for GDAL>=3.8.0",
+)
+@pytest.mark.parametrize("skip_features", [10, 200])
+def test_open_arrow_skip_features_unsupported(naturalearth_lowres, skip_features):
+    """skip_features are not supported for the Arrow stream interface for
+    GDAL < 3.8.0"""
+    with pytest.raises(
+        ValueError,
+        match="specifying 'skip_features' is not supported for Arrow for GDAL<3.8.0",
+    ):
+        with open_arrow(naturalearth_lowres, skip_features=skip_features) as (
+            meta,
+            reader,
+        ):
+            pass
+
+
+@pytest.mark.parametrize("max_features", [10, 200])
+def test_open_arrow_max_features_unsupported(naturalearth_lowres, max_features):
+    """max_features are not supported for the Arrow stream interface"""
+    with pytest.raises(
+        ValueError,
+        match="specifying 'max_features' is not supported for Arrow",
+    ):
+        with open_arrow(naturalearth_lowres, max_features=max_features) as (
+            meta,
+            reader,
+        ):
+            pass
+
+
+@contextlib.contextmanager
+def use_arrow_context():
+    original = os.environ.get("PYOGRIO_USE_ARROW", None)
+    os.environ["PYOGRIO_USE_ARROW"] = "1"
+    yield
+    if original:
+        os.environ["PYOGRIO_USE_ARROW"] = original
+    else:
+        del os.environ["PYOGRIO_USE_ARROW"]
+
+
+def test_enable_with_environment_variable(test_ogr_types_list):
+    # list types are only supported with arrow, so don't work by default and work
+    # when arrow is enabled through env variable
+    result = read_dataframe(test_ogr_types_list)
+    assert "list_int64" not in result.columns
+
+    with use_arrow_context():
+        result = read_dataframe(test_ogr_types_list)
+    assert "list_int64" in result.columns
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/test_core.py
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/test_core.py
@@ -0,0 +1,496 @@
+import numpy as np
+from numpy import array_equal, allclose
+import pytest
+
+from pyogrio import (
+    __gdal_version__,
+    __gdal_geos_version__,
+    list_drivers,
+    list_layers,
+    read_bounds,
+    read_info,
+    set_gdal_config_options,
+    get_gdal_config_option,
+    get_gdal_data_path,
+)
+from pyogrio.core import detect_write_driver
+from pyogrio.errors import DataSourceError, DataLayerError
+from pyogrio.tests.conftest import HAS_SHAPELY, prepare_testfile
+
+from pyogrio._env import GDALEnv
+
+with GDALEnv():
+    # NOTE: this must be AFTER above imports, which init the GDAL and PROJ data
+    # search paths
+    from pyogrio._ogr import ogr_driver_supports_write, has_gdal_data, has_proj_data
+
+
+try:
+    import shapely
+except ImportError:
+    pass
+
+
+def test_gdal_data():
+    # test will fail if GDAL data files cannot be found, indicating an
+    # installation error
+    assert has_gdal_data()
+
+
+def test_proj_data():
+    # test will fail if PROJ data files cannot be found, indicating an
+    # installation error
+    assert has_proj_data()
+
+
+def test_get_gdal_data_path():
+    # test will fail if the function returns None, which means that GDAL
+    # cannot find data files, indicating an installation error
+    assert isinstance(get_gdal_data_path(), str)
+
+
+def test_gdal_geos_version():
+    assert __gdal_geos_version__ is None or isinstance(__gdal_geos_version__, tuple)
+
+
+@pytest.mark.parametrize(
+    "path,expected",
+    [
+        ("test.shp", "ESRI Shapefile"),
+        ("test.shp.zip", "ESRI Shapefile"),
+        ("test.geojson", "GeoJSON"),
+        ("test.geojsonl", "GeoJSONSeq"),
+        ("test.gpkg", "GPKG"),
+        pytest.param(
+            "test.gpkg.zip",
+            "GPKG",
+            marks=pytest.mark.skipif(
+                __gdal_version__ < (3, 7, 0),
+                reason="writing *.gpkg.zip requires GDAL >= 3.7.0",
+            ),
+        ),
+        # postgres can be detected by prefix instead of extension
+        pytest.param(
+            "PG:dbname=test",
+            "PostgreSQL",
+            marks=pytest.mark.skipif(
+                "PostgreSQL" not in list_drivers(),
+                reason="PostgreSQL path test requires PostgreSQL driver",
+            ),
+        ),
+    ],
+)
+def test_detect_write_driver(path, expected):
+    assert detect_write_driver(path) == expected
+
+
+@pytest.mark.parametrize(
+    "path",
+    [
+        "test.svg",  # only supports read
+        "test.",  # not a valid extension
+        "test",  # no extension or prefix
+        "test.foo",  # not a valid extension
+        "FOO:test",  # not a valid prefix
+    ],
+)
+def test_detect_write_driver_unsupported(path):
+    with pytest.raises(ValueError, match="Could not infer driver from path"):
+        detect_write_driver(path)
+
+
+@pytest.mark.parametrize("path", ["test.xml", "test.txt"])
+def test_detect_write_driver_multiple_unsupported(path):
+    with pytest.raises(ValueError, match="multiple drivers are available"):
+        detect_write_driver(path)
+
+
+@pytest.mark.parametrize(
+    "driver,expected",
+    [
+        # drivers known to be well-supported by pyogrio
+        ("ESRI Shapefile", True),
+        ("GeoJSON", True),
+        ("GeoJSONSeq", True),
+        ("GPKG", True),
+        # drivers not supported for write by GDAL
+        ("HTTP", False),
+        ("OAPIF", False),
+    ],
+)
+def test_ogr_driver_supports_write(driver, expected):
+    assert ogr_driver_supports_write(driver) == expected
+
+
+def test_list_drivers():
+    all_drivers = list_drivers()
+
+    # verify that the core drivers are present
+    for name in ("ESRI Shapefile", "GeoJSON", "GeoJSONSeq", "GPKG", "OpenFileGDB"):
+        assert name in all_drivers
+
+        expected_capability = "rw"
+        if name == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
+            expected_capability = "r"
+
+        assert all_drivers[name] == expected_capability
+
+    drivers = list_drivers(read=True)
+    expected = {k: v for k, v in all_drivers.items() if v.startswith("r")}
+    assert len(drivers) == len(expected)
+
+    drivers = list_drivers(write=True)
+    expected = {k: v for k, v in all_drivers.items() if v.endswith("w")}
+    assert len(drivers) == len(expected)
+
+    drivers = list_drivers(read=True, write=True)
+    expected = {
+        k: v for k, v in all_drivers.items() if v.startswith("r") and v.endswith("w")
+    }
+    assert len(drivers) == len(expected)
+
+
+def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi):
+    assert array_equal(
+        list_layers(naturalearth_lowres), [["naturalearth_lowres", "Polygon"]]
+    )
+
+    assert array_equal(
+        list_layers(naturalearth_lowres_vsi[1]), [["naturalearth_lowres", "Polygon"]]
+    )
+
+    # Measured 3D is downgraded to plain 3D during read
+    # Make sure this warning is raised
+    with pytest.warns(
+        UserWarning, match=r"Measured \(M\) geometry types are not supported"
+    ):
+        fgdb_layers = list_layers(test_fgdb_vsi)
+        # GDAL >= 3.4.0 includes 'another_relationship' layer
+        assert len(fgdb_layers) >= 7
+
+        # Make sure that nonspatial layer has None for geometry
+        assert array_equal(fgdb_layers[0], ["basetable_2", None])
+
+        # Confirm that measured 3D is downgraded to plain 3D during read
+        assert array_equal(fgdb_layers[3], ["test_lines", "MultiLineString Z"])
+        assert array_equal(fgdb_layers[6], ["test_areas", "MultiPolygon Z"])
+
+
+def test_read_bounds(naturalearth_lowres):
+    fids, bounds = read_bounds(naturalearth_lowres)
+    assert fids.shape == (177,)
+    assert bounds.shape == (4, 177)
+
+    assert fids[0] == 0
+    # Fiji; wraps antimeridian
+    assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088])
+
+
+def test_read_bounds_max_features(naturalearth_lowres):
+    bounds = read_bounds(naturalearth_lowres, max_features=2)[1]
+    assert bounds.shape == (4, 2)
+
+
+def test_read_bounds_negative_max_features(naturalearth_lowres):
+    with pytest.raises(ValueError, match="'max_features' must be >= 0"):
+        read_bounds(naturalearth_lowres, max_features=-1)
+
+
+def test_read_bounds_skip_features(naturalearth_lowres):
+    expected_bounds = read_bounds(naturalearth_lowres, max_features=11)[1][:, 10]
+    fids, bounds = read_bounds(naturalearth_lowres, skip_features=10)
+    assert bounds.shape == (4, 167)
+    assert allclose(bounds[:, 0], expected_bounds)
+    assert fids[0] == 10
+
+
+def test_read_bounds_negative_skip_features(naturalearth_lowres):
+    with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
+        read_bounds(naturalearth_lowres, skip_features=-1)
+
+
+def test_read_bounds_where_invalid(naturalearth_lowres_all_ext):
+    with pytest.raises(ValueError, match="Invalid SQL"):
+        read_bounds(naturalearth_lowres_all_ext, where="invalid")
+
+
+def test_read_bounds_where(naturalearth_lowres):
+    fids, bounds = read_bounds(naturalearth_lowres, where="iso_a3 = 'CAN'")
+    assert fids.shape == (1,)
+    assert bounds.shape == (4, 1)
+    assert fids[0] == 3
+    assert allclose(bounds[:, 0], [-140.99778, 41.675105, -52.648099, 83.23324])
+
+
+@pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
+def test_read_bounds_bbox_invalid(naturalearth_lowres, bbox):
+    with pytest.raises(ValueError, match="Invalid bbox"):
+        read_bounds(naturalearth_lowres, bbox=bbox)
+
+
+def test_read_bounds_bbox(naturalearth_lowres_all_ext):
+    # should return no features
+    fids, bounds = read_bounds(
+        naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001)
+    )
+
+    assert fids.shape == (0,)
+    assert bounds.shape == (4, 0)
+
+    fids, bounds = read_bounds(naturalearth_lowres_all_ext, bbox=(-85, 8, -80, 10))
+
+    assert fids.shape == (2,)
+    if naturalearth_lowres_all_ext.suffix == ".gpkg":
+        # fid in gpkg is 1-based
+        assert array_equal(fids, [34, 35])  # PAN, CRI
+    else:
+        # fid in other formats is 0-based
+        assert array_equal(fids, [33, 34])  # PAN, CRI
+
+    assert bounds.shape == (4, 2)
+    assert allclose(
+        bounds.T,
+        [
+            [-82.96578305, 7.22054149, -77.24256649, 9.61161001],
+            [-85.94172543, 8.22502798, -82.54619626, 11.21711925],
+        ],
+    )
+
+
+@pytest.mark.skipif(
+    not HAS_SHAPELY, reason="Shapely is required for mask functionality"
+)
+@pytest.mark.parametrize(
+    "mask",
+    [
+        {"type": "Point", "coordinates": [0, 0]},
+        '{"type": "Point", "coordinates": [0, 0]}',
+        "invalid",
+    ],
+)
+def test_read_bounds_mask_invalid(naturalearth_lowres, mask):
+    with pytest.raises(ValueError, match="'mask' parameter must be a Shapely geometry"):
+        read_bounds(naturalearth_lowres, mask=mask)
+
+
+@pytest.mark.skipif(
+    not HAS_SHAPELY, reason="Shapely is required for mask functionality"
+)
+def test_read_bounds_bbox_mask_invalid(naturalearth_lowres):
+    with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
+        read_bounds(
+            naturalearth_lowres, bbox=(-85, 8, -80, 10), mask=shapely.Point(-105, 55)
+        )
+
+
+@pytest.mark.skipif(
+    not HAS_SHAPELY, reason="Shapely is required for mask functionality"
+)
+@pytest.mark.parametrize(
+    "mask,expected",
+    [
+        ("POINT (-105 55)", [3]),
+        ("POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))", [33, 34]),
+        (
+            """POLYGON ((
+                6.101929 50.97085,
+                5.773002 50.906611,
+                5.593156 50.642649,
+                6.059271 50.686052,
+                6.374064 50.851481,
+                6.101929 50.97085
+            ))""",
+            [121, 129, 130],
+        ),
+        (
+            """GEOMETRYCOLLECTION (
+                POINT (-7.7 53),
+                POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))
+            )""",
+            [33, 34, 133],
+        ),
+    ],
+)
+def test_read_bounds_mask(naturalearth_lowres_all_ext, mask, expected):
+    mask = shapely.from_wkt(mask)
+
+    fids = read_bounds(naturalearth_lowres_all_ext, mask=mask)[0]
+
+    if naturalearth_lowres_all_ext.suffix == ".gpkg":
+        # fid in gpkg is 1-based
+        assert array_equal(fids, np.array(expected) + 1)
+    else:
+        # fid in other formats is 0-based
+        assert array_equal(fids, expected)
+
+
+@pytest.mark.skipif(
+    __gdal_version__ < (3, 4, 0),
+    reason="Cannot determine if GEOS is present or absent for GDAL < 3.4",
+)
+def test_read_bounds_bbox_intersects_vs_envelope_overlaps(naturalearth_lowres_all_ext):
+    # If GEOS is present and used by GDAL, bbox filter will be based on intersection
+    # of bbox and actual geometries; if GEOS is absent or not used by GDAL, it
+    # will be based on overlap of bounding boxes instead
+    fids, _ = read_bounds(naturalearth_lowres_all_ext, bbox=(-140, 20, -100, 45))
+
+    if __gdal_geos_version__ is None:
+        # bboxes for CAN, RUS overlap but do not intersect geometries
+        assert fids.shape == (4,)
+        if naturalearth_lowres_all_ext.suffix == ".gpkg":
+            # fid in gpkg is 1-based
+            assert array_equal(fids, [4, 5, 19, 28])  # CAN, USA, RUS, MEX
+        else:
+            # fid in other formats is 0-based
+            assert array_equal(fids, [3, 4, 18, 27])  # CAN, USA, RUS, MEX
+
+    else:
+        assert fids.shape == (2,)
+        if naturalearth_lowres_all_ext.suffix == ".gpkg":
+            # fid in gpkg is 1-based
+            assert array_equal(fids, [5, 28])  # USA, MEX
+        else:
+            # fid in other formats is 0-based
+            assert array_equal(fids, [4, 27])  # USA, MEX
+
+
+def test_read_info(naturalearth_lowres):
+    meta = read_info(naturalearth_lowres)
+
+    assert meta["crs"] == "EPSG:4326"
+    assert meta["geometry_type"] == "Polygon"
+    assert meta["encoding"] == "UTF-8"
+    assert meta["fields"].shape == (5,)
+    assert meta["dtypes"].tolist() == ["int64", "object", "object", "object", "float64"]
+    assert meta["features"] == 177
+    assert allclose(meta["total_bounds"], (-180, -90, 180, 83.64513))
+    assert meta["driver"] == "ESRI Shapefile"
+    assert meta["capabilities"]["random_read"] is True
+    assert meta["capabilities"]["fast_set_next_by_index"] is True
+    assert meta["capabilities"]["fast_spatial_filter"] is False
+    assert meta["capabilities"]["fast_feature_count"] is True
+    assert meta["capabilities"]["fast_total_bounds"] is True
+
+
+@pytest.mark.parametrize(
+    "dataset_kwargs,fields",
+    [
+        ({}, ["top_level", "intermediate_level"]),
+        (
+            {"FLATTEN_NESTED_ATTRIBUTES": "YES"},
+            [
+                "top_level",
+                "intermediate_level_bottom_level",
+            ],
+        ),
+        (
+            {"flatten_nested_attributes": "yes"},
+            [
+                "top_level",
+                "intermediate_level_bottom_level",
+            ],
+        ),
+        (
+            {"flatten_nested_attributes": True},
+            [
+                "top_level",
+                "intermediate_level_bottom_level",
+            ],
+        ),
+    ],
+)
+def test_read_info_dataset_kwargs(data_dir, dataset_kwargs, fields):
+    meta = read_info(data_dir / "test_nested.geojson", **dataset_kwargs)
+    assert meta["fields"].tolist() == fields
+
+
+def test_read_info_invalid_dataset_kwargs(naturalearth_lowres):
+    with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
+        read_info(naturalearth_lowres, INVALID="YES")
+
+
+def test_read_info_force_feature_count_exception(data_dir):
+    with pytest.raises(DataLayerError, match="Could not iterate over features"):
+        read_info(data_dir / "sample.osm.pbf", layer="lines", force_feature_count=True)
+
+
+@pytest.mark.parametrize(
+    "layer, force, expected",
+    [
+        ("points", False, -1),
+        ("points", True, 8),
+        ("lines", False, -1),
+        ("lines", True, 36),
+    ],
+)
+def test_read_info_force_feature_count(data_dir, layer, force, expected):
+    # the sample OSM file has non-increasing node IDs which causes the default
+    # custom indexing to raise an exception iterating over features
+    meta = read_info(
+        data_dir / "sample.osm.pbf",
+        layer=layer,
+        force_feature_count=force,
+        USE_CUSTOM_INDEXING=False,
+    )
+    assert meta["features"] == expected
+
+
+@pytest.mark.parametrize(
+    "force_total_bounds, expected_total_bounds",
+    [(True, (-180.0, -90.0, 180.0, 83.64513)), (False, None)],
+)
+def test_read_info_force_total_bounds(
+    tmpdir, naturalearth_lowres, force_total_bounds, expected_total_bounds
+):
+    # Geojson files don't hava a fast way to determine total_bounds
+    geojson_path = prepare_testfile(naturalearth_lowres, dst_dir=tmpdir, ext=".geojson")
+    info = read_info(geojson_path, force_total_bounds=force_total_bounds)
+    if expected_total_bounds is not None:
+        assert allclose(info["total_bounds"], expected_total_bounds)
+    else:
+        assert info["total_bounds"] is None
+
+
+def test_read_info_without_geometry(test_fgdb_vsi):
+    assert read_info(test_fgdb_vsi)["total_bounds"] is None
+
+
+@pytest.mark.parametrize(
+    "name,value,expected",
+    [
+        ("CPL_DEBUG", "ON", True),
+        ("CPL_DEBUG", True, True),
+        ("CPL_DEBUG", "OFF", False),
+        ("CPL_DEBUG", False, False),
+    ],
+)
+def test_set_config_options(name, value, expected):
+    set_gdal_config_options({name: value})
+    actual = get_gdal_config_option(name)
+    assert actual == expected
+
+
+def test_reset_config_options():
+    set_gdal_config_options({"foo": "bar"})
+    assert get_gdal_config_option("foo") == "bar"
+
+    set_gdal_config_options({"foo": None})
+    assert get_gdal_config_option("foo") is None
+
+
+def test_error_handling(capfd):
+    # an operation that triggers a GDAL Failure
+    # -> error translated into Python exception + not printed to stderr
+    with pytest.raises(DataSourceError, match="No such file or directory"):
+        read_info("non-existent.shp")
+
+    assert capfd.readouterr().err == ""
+
+
+def test_error_handling_warning(capfd, naturalearth_lowres):
+    # an operation that triggers a GDAL Warning
+    # -> translated into a Python warning + not printed to stderr
+    with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
+        read_info(naturalearth_lowres, INVALID="YES")
+
+    assert capfd.readouterr().err == ""
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/test_geopandas_io.py
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/test_geopandas_io.py
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/test_path.py
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/test_path.py
@@ -0,0 +1,332 @@
+import os
+import contextlib
+from zipfile import ZipFile, ZIP_DEFLATED
+
+import pytest
+
+import pyogrio
+import pyogrio.raw
+from pyogrio.util import vsi_path
+
+try:
+    import geopandas  # NOQA
+
+    has_geopandas = True
+except ImportError:
+    has_geopandas = False
+
+
+@contextlib.contextmanager
+def change_cwd(path):
+    curdir = os.getcwd()
+    os.chdir(str(path))
+    try:
+        yield
+    finally:
+        os.chdir(curdir)
+
+
+@pytest.mark.parametrize(
+    "path, expected",
+    [
+        # local file paths that should be passed through as is
+        ("data.gpkg", "data.gpkg"),
+        ("/home/user/data.gpkg", "/home/user/data.gpkg"),
+        (r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"),
+        ("file:///home/user/data.gpkg", "/home/user/data.gpkg"),
+        # cloud URIs
+        ("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"),
+        ("s3://testing/data.gpkg", "/vsis3/testing/data.gpkg"),
+        ("gs://testing/data.gpkg", "/vsigs/testing/data.gpkg"),
+        ("az://testing/data.gpkg", "/vsiaz/testing/data.gpkg"),
+        ("adl://testing/data.gpkg", "/vsiadls/testing/data.gpkg"),
+        ("adls://testing/data.gpkg", "/vsiadls/testing/data.gpkg"),
+        ("hdfs://testing/data.gpkg", "/vsihdfs/testing/data.gpkg"),
+        ("webhdfs://testing/data.gpkg", "/vsiwebhdfs/testing/data.gpkg"),
+        # archives
+        ("zip://data.zip", "/vsizip/data.zip"),
+        ("tar://data.tar", "/vsitar/data.tar"),
+        ("gzip://data.gz", "/vsigzip/data.gz"),
+        ("tar://./my.tar!my.geojson", "/vsitar/./my.tar/my.geojson"),
+        (
+            "zip://home/data/shapefile.zip!layer.shp",
+            "/vsizip/home/data/shapefile.zip/layer.shp",
+        ),
+        # combined schemes
+        ("zip+s3://testing/shapefile.zip", "/vsizip/vsis3/testing/shapefile.zip"),
+        (
+            "zip+https://s3.amazonaws.com/testing/shapefile.zip",
+            "/vsizip/vsicurl/https://s3.amazonaws.com/testing/shapefile.zip",
+        ),
+        # auto-prefix zip files
+        ("test.zip", "/vsizip/test.zip"),
+        ("/a/b/test.zip", "/vsizip//a/b/test.zip"),
+        ("a/b/test.zip", "/vsizip/a/b/test.zip"),
+        # archives using ! notation should be prefixed by vsizip
+        ("test.zip!item.shp", "/vsizip/test.zip/item.shp"),
+        ("test.zip!/a/b/item.shp", "/vsizip/test.zip/a/b/item.shp"),
+        ("test.zip!a/b/item.shp", "/vsizip/test.zip/a/b/item.shp"),
+        ("/vsizip/test.zip/a/b/item.shp", "/vsizip/test.zip/a/b/item.shp"),
+        ("zip:///test.zip/a/b/item.shp", "/vsizip//test.zip/a/b/item.shp"),
+        # auto-prefix remote zip files
+        (
+            "https://s3.amazonaws.com/testing/test.zip",
+            "/vsizip/vsicurl/https://s3.amazonaws.com/testing/test.zip",
+        ),
+        (
+            "https://s3.amazonaws.com/testing/test.zip!/a/b/item.shp",
+            "/vsizip/vsicurl/https://s3.amazonaws.com/testing/test.zip/a/b/item.shp",
+        ),
+        ("s3://testing/test.zip", "/vsizip/vsis3/testing/test.zip"),
+        (
+            "s3://testing/test.zip!a/b/item.shp",
+            "/vsizip/vsis3/testing/test.zip/a/b/item.shp",
+        ),
+    ],
+)
+def test_vsi_path(path, expected):
+    assert vsi_path(path) == expected
+
+
+def test_vsi_path_unknown():
+    # unrecognized URI gets passed through as is
+    assert vsi_path("s4://test/data.geojson") == "s4://test/data.geojson"
+
+
+def test_vsi_handling_read_functions(naturalearth_lowres_vsi):
+    # test that all different read entry points have the path handling
+    # (a zip:// path would otherwise fail)
+    path, _ = naturalearth_lowres_vsi
+    path = "zip://" + str(path)
+
+    result = pyogrio.raw.read(path)
+    assert len(result[2]) == 177
+
+    result = pyogrio.read_info(path)
+    assert result["features"] == 177
+
+    result = pyogrio.read_bounds(path)
+    assert len(result[0]) == 177
+
+
+@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
+def test_vsi_handling_read_dataframe(naturalearth_lowres_vsi):
+    path, _ = naturalearth_lowres_vsi
+    path = "zip://" + str(path)
+
+    result = pyogrio.read_dataframe(path)
+    assert len(result) == 177
+
+
+@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
+def test_path_absolute(data_dir):
+    # pathlib path
+    path = data_dir / "naturalearth_lowres/naturalearth_lowres.shp"
+    df = pyogrio.read_dataframe(path)
+    assert len(df) == 177
+
+    # str path
+    df = pyogrio.read_dataframe(str(path))
+    assert len(df) == 177
+
+
+def test_path_relative(data_dir):
+    path = "naturalearth_lowres/naturalearth_lowres.shp"
+
+    with change_cwd(data_dir):
+        result = pyogrio.raw.read(path)
+        assert len(result[2]) == 177
+
+        result = pyogrio.read_info(path)
+        assert result["features"] == 177
+
+        result = pyogrio.read_bounds(path)
+        assert len(result[0]) == 177
+
+
+@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
+def test_path_relative_dataframe(data_dir):
+    with change_cwd(data_dir):
+        df = pyogrio.read_dataframe("naturalearth_lowres/naturalearth_lowres.shp")
+        assert len(df) == 177
+
+
+def test_uri_local_file(data_dir):
+    path = "file://" + str(data_dir / "naturalearth_lowres/naturalearth_lowres.shp")
+    result = pyogrio.raw.read(path)
+    assert len(result[2]) == 177
+
+    result = pyogrio.read_info(path)
+    assert result["features"] == 177
+
+    result = pyogrio.read_bounds(path)
+    assert len(result[0]) == 177
+
+
+@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
+def test_uri_local_file_dataframe(data_dir):
+    uri = "file://" + str(data_dir / "naturalearth_lowres/naturalearth_lowres.shp")
+    df = pyogrio.read_dataframe(uri)
+    assert len(df) == 177
+
+
+def test_zip_path(naturalearth_lowres_vsi):
+    path, path_vsi = naturalearth_lowres_vsi
+    path_zip = "zip://" + str(path)
+
+    # absolute zip path
+    result = pyogrio.raw.read(path_zip)
+    assert len(result[2]) == 177
+
+    result = pyogrio.read_info(path_zip)
+    assert result["features"] == 177
+
+    result = pyogrio.read_bounds(path_zip)
+    assert len(result[0]) == 177
+
+    # absolute vsizip path
+    result = pyogrio.raw.read(path_vsi)
+    assert len(result[2]) == 177
+
+    result = pyogrio.read_info(path_vsi)
+    assert result["features"] == 177
+
+    result = pyogrio.read_bounds(path_vsi)
+    assert len(result[0]) == 177
+
+    # relative zip path
+    relative_path = "zip://" + path.name
+    with change_cwd(path.parent):
+        result = pyogrio.raw.read(relative_path)
+        assert len(result[2]) == 177
+
+        result = pyogrio.read_info(relative_path)
+        assert result["features"] == 177
+
+        result = pyogrio.read_bounds(relative_path)
+        assert len(result[0]) == 177
+
+
+@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
+def test_zip_path_dataframe(naturalearth_lowres_vsi):
+    path, path_vsi = naturalearth_lowres_vsi
+    path_zip = "zip://" + str(path)
+
+    # absolute zip path
+    df = pyogrio.read_dataframe(path_zip)
+    assert len(df) == 177
+
+    # absolute vsizip path
+    df = pyogrio.read_dataframe(path_vsi)
+    assert len(df) == 177
+
+    # relative zip path
+    with change_cwd(path.parent):
+        df = pyogrio.read_dataframe("zip://" + path.name)
+        assert len(df) == 177
+
+
+@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
+def test_detect_zip_path(tmp_path, naturalearth_lowres):
+    # create a zipfile with 2 shapefiles in a set of subdirectories
+    df = pyogrio.read_dataframe(naturalearth_lowres, where="iso_a3 in ('CAN', 'PER')")
+    pyogrio.write_dataframe(df.loc[df.iso_a3 == "CAN"], tmp_path / "test1.shp")
+    pyogrio.write_dataframe(df.loc[df.iso_a3 == "PER"], tmp_path / "test2.shp")
+
+    path = tmp_path / "test.zip"
+    with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out:
+        for ext in ["dbf", "prj", "shp", "shx"]:
+            filename = f"test1.{ext}"
+            out.write(tmp_path / filename, filename)
+
+            filename = f"test2.{ext}"
+            out.write(tmp_path / filename, f"/a/b/{filename}")
+
+    # defaults to the first shapefile found, at lowest subdirectory
+    df = pyogrio.read_dataframe(path)
+    assert df.iso_a3[0] == "CAN"
+
+    # selecting a shapefile from within the zip requires "!"" archive specifier
+    df = pyogrio.read_dataframe(f"{path}!test1.shp")
+    assert df.iso_a3[0] == "CAN"
+
+    df = pyogrio.read_dataframe(f"{path}!/a/b/test2.shp")
+    assert df.iso_a3[0] == "PER"
+
+    # specifying zip:// scheme should also work
+    df = pyogrio.read_dataframe(f"zip://{path}!/a/b/test2.shp")
+    assert df.iso_a3[0] == "PER"
+
+    # specifying /vsizip/ should also work but path must already be in GDAL ready
+    # format without the "!"" archive specifier
+    df = pyogrio.read_dataframe(f"/vsizip/{path}/a/b/test2.shp")
+    assert df.iso_a3[0] == "PER"
+
+
+@pytest.mark.network
+def test_url():
+    url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp"  # NOQA
+
+    result = pyogrio.raw.read(url)
+    assert len(result[2]) == 177
+
+    result = pyogrio.read_info(url)
+    assert result["features"] == 177
+
+    result = pyogrio.read_bounds(url)
+    assert len(result[0]) == 177
+
+
+@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
+def test_url_dataframe():
+    url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp"  # NOQA
+
+    assert len(pyogrio.read_dataframe(url)) == 177
+
+
+@pytest.mark.network
+def test_url_with_zip():
+    url = "zip+https://s3.amazonaws.com/fiona-testing/coutwildrnp.zip"
+
+    result = pyogrio.raw.read(url)
+    assert len(result[2]) == 67
+
+    result = pyogrio.read_info(url)
+    assert result["features"] == 67
+
+    result = pyogrio.read_bounds(url)
+    assert len(result[0]) == 67
+
+
+@pytest.mark.network
+@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
+def test_url_with_zip_dataframe():
+    url = "zip+https://s3.amazonaws.com/fiona-testing/coutwildrnp.zip"
+    df = pyogrio.read_dataframe(url)
+    assert len(df) == 67
+
+
+@pytest.fixture
+def aws_env_setup(monkeypatch):
+    monkeypatch.setenv("AWS_NO_SIGN_REQUEST", "YES")
+
+
+@pytest.mark.network
+def test_uri_s3(aws_env_setup):
+    url = "zip+s3://fiona-testing/coutwildrnp.zip"
+
+    result = pyogrio.raw.read(url)
+    assert len(result[2]) == 67
+
+    result = pyogrio.read_info(url)
+    assert result["features"] == 67
+
+    result = pyogrio.read_bounds(url)
+    assert len(result[0]) == 67
+
+
+@pytest.mark.network
+@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
+def test_uri_s3_dataframe(aws_env_setup):
+    df = pyogrio.read_dataframe("zip+s3://fiona-testing/coutwildrnp.zip")
+    assert len(df) == 67
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/test_raw_io.py
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/test_raw_io.py
--- a/.venv/lib/python3.12/site-packages/pyogrio/tests/win32.py
+++ b/.venv/lib/python3.12/site-packages/pyogrio/tests/win32.py
@@ -0,0 +1,86 @@
+"""Run pytest tests manually on Windows due to import errors
+"""
+from pathlib import Path
+import platform
+from tempfile import TemporaryDirectory
+
+
+data_dir = Path(__file__).parent.resolve() / "fixtures"
+
+if platform.system() == "Windows":
+
+    naturalearth_lowres = data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp")
+    test_fgdb_vsi = f"/vsizip/{data_dir}/test_fgdb.gdb.zip"
+
+    from pyogrio.tests.test_core import test_read_info
+
+    try:
+        test_read_info(naturalearth_lowres)
+    except Exception as ex:
+        print(ex)
+
+    from pyogrio.tests.test_raw_io import (
+        test_read,
+        test_read_no_geometry,
+        test_read_columns,
+        test_read_skip_features,
+        test_read_max_features,
+        test_read_where,
+        test_read_where_invalid,
+        test_write,
+        test_write_gpkg,
+        test_write_geojson,
+    )
+
+    try:
+        test_read(naturalearth_lowres)
+    except Exception as ex:
+        print(ex)
+
+    try:
+        test_read_no_geometry(naturalearth_lowres)
+    except Exception as ex:
+        print(ex)
+
+    try:
+        test_read_columns(naturalearth_lowres)
+    except Exception as ex:
+        print(ex)
+
+    try:
+        test_read_skip_features(naturalearth_lowres)
+    except Exception as ex:
+        print(ex)
+
+    try:
+        test_read_max_features(naturalearth_lowres)
+    except Exception as ex:
+        print(ex)
+
+    try:
+        test_read_where(naturalearth_lowres)
+    except Exception as ex:
+        print(ex)
+
+    try:
+        test_read_where_invalid(naturalearth_lowres)
+    except Exception as ex:
+        print(ex)
+
+    with TemporaryDirectory() as tmpdir:
+        try:
+            test_write(tmpdir, naturalearth_lowres)
+        except Exception as ex:
+            print(ex)
+
+    with TemporaryDirectory() as tmpdir:
+        try:
+            test_write_gpkg(tmpdir, naturalearth_lowres)
+        except Exception as ex:
+            print(ex)
+
+    with TemporaryDirectory() as tmpdir:
+        try:
+            test_write_geojson(tmpdir, naturalearth_lowres)
+        except Exception as ex:
+            print(ex)
				`@@ -0,0 +1 @@`
				`GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]`