venv

2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions
--- a/.venv/lib/python3.12/site-packages/geopandas/io/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/pycache/_pyarrow_hotfix.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/pycache/_pyarrow_hotfix.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/pycache/arrow.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/pycache/arrow.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/pycache/file.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/pycache/file.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/pycache/sql.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/pycache/sql.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/_pyarrow_hotfix.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/_pyarrow_hotfix.py
@@ -2,7 +2,6 @@ from packaging.version import Version

 import pyarrow

-
 _ERROR_MSG = """\
 Disallowed deserialization of 'arrow.py_extension_type':
 storage_type = {storage_type}
--- a/.venv/lib/python3.12/site-packages/geopandas/io/arrow.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/arrow.py
@@ -1,19 +1,31 @@
-from packaging.version import Version
 import json
 import warnings
+from packaging.version import Version

 import numpy as np
 from pandas import DataFrame, Series

-import geopandas._compat as compat
-from geopandas._compat import import_optional_dependency
-from geopandas.array import from_wkb
-from geopandas import GeoDataFrame
+import shapely
+
 import geopandas
+from geopandas import GeoDataFrame
+from geopandas._compat import import_optional_dependency
+from geopandas.array import from_shapely, from_wkb
+
 from .file import _expand_user

 METADATA_VERSION = "1.0.0"
-SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0"]
+SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0", "1.1.0"]
+GEOARROW_ENCODINGS = [
+    "point",
+    "linestring",
+    "polygon",
+    "multipoint",
+    "multilinestring",
+    "multipolygon",
+]
+SUPPORTED_ENCODINGS = ["WKB"] + GEOARROW_ENCODINGS
+
 # reference: https://github.com/opengeospatial/geoparquet

 # Metadata structure:
@@ -68,7 +80,40 @@ def _remove_id_from_member_of_ensembles(json_dict):
                member.pop("id", None)


-def _create_metadata(df, schema_version=None):
+# type ids 0 to 7
+_geometry_type_names = [
+    "Point",
+    "LineString",
+    "LineString",
+    "Polygon",
+    "MultiPoint",
+    "MultiLineString",
+    "MultiPolygon",
+    "GeometryCollection",
+]
+_geometry_type_names += [geom_type + " Z" for geom_type in _geometry_type_names]
+
+
+def _get_geometry_types(series):
+    """
+    Get unique geometry types from a GeoSeries.
+    """
+    arr_geometry_types = shapely.get_type_id(series.array._data)
+    # ensure to include "... Z" for 3D geometries
+    has_z = shapely.has_z(series.array._data)
+    arr_geometry_types[has_z] += 8
+
+    geometry_types = Series(arr_geometry_types).unique().tolist()
+    # drop missing values (shapely.get_type_id returns -1 for those)
+    if -1 in geometry_types:
+        geometry_types.remove(-1)
+
+    return sorted([_geometry_type_names[idx] for idx in geometry_types])
+
+
+def _create_metadata(
+    df, schema_version=None, geometry_encoding=None, write_covering_bbox=False
+):
    """Create and encode geo metadata dict.

    Parameters
@@ -77,13 +122,22 @@ def _create_metadata(df, schema_version=None):
    schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', '1.0.0', None}
        GeoParquet specification version; if not provided will default to
        latest supported version.
+    write_covering_bbox : bool, default False
+        Writes the bounding box column for each row entry with column
+        name 'bbox'. Writing a bbox column can be computationally
+        expensive, hence is default setting is False.

    Returns
    -------
    dict
    """
-
-    schema_version = schema_version or METADATA_VERSION
+    if schema_version is None:
+        if geometry_encoding and any(
+            encoding != "WKB" for encoding in geometry_encoding.values()
+        ):
+            schema_version = "1.1.0"
+        else:
+            schema_version = METADATA_VERSION

    if schema_version not in SUPPORTED_VERSIONS:
        raise ValueError(
@@ -94,7 +148,8 @@ def _create_metadata(df, schema_version=None):
    column_metadata = {}
    for col in df.columns[df.dtypes == "geometry"]:
        series = df[col]
-        geometry_types = sorted(Series(series.geom_type.unique()).dropna())
+
+        geometry_types = _get_geometry_types(series)
        if schema_version[0] == "0":
            geometry_types_name = "geometry_type"
            if len(geometry_types) == 1:
@@ -111,7 +166,7 @@ def _create_metadata(df, schema_version=None):
                _remove_id_from_member_of_ensembles(crs)

        column_metadata[col] = {
-            "encoding": "WKB",
+            "encoding": geometry_encoding[col],
            "crs": crs,
            geometry_types_name: geometry_types,
        }
@@ -121,10 +176,20 @@ def _create_metadata(df, schema_version=None):
            # don't add bbox with NaNs for empty / all-NA geometry column
            column_metadata[col]["bbox"] = bbox

+        if write_covering_bbox:
+            column_metadata[col]["covering"] = {
+                "bbox": {
+                    "xmin": ["bbox", "xmin"],
+                    "ymin": ["bbox", "ymin"],
+                    "xmax": ["bbox", "xmax"],
+                    "ymax": ["bbox", "ymax"],
+                },
+            }
+
    return {
        "primary_column": df._geometry_column_name,
        "columns": column_metadata,
-        "version": schema_version or METADATA_VERSION,
+        "version": schema_version,
        "creator": {"library": "geopandas", "version": geopandas.__version__},
    }

@@ -188,7 +253,7 @@ def _validate_dataframe(df):
        raise ValueError("Index level names must be strings")


-def _validate_metadata(metadata):
+def _validate_geo_metadata(metadata):
    """Validate geo metadata.
    Must not be empty, and must contain the structure specified above.

@@ -232,8 +297,12 @@ def _validate_metadata(metadata):
                    "'{key}' for column '{col}'".format(key=key, col=col)
                )

-        if column_metadata["encoding"] != "WKB":
-            raise ValueError("Only WKB geometry encoding is supported")
+        if column_metadata["encoding"] not in SUPPORTED_ENCODINGS:
+            raise ValueError(
+                "Only WKB geometry encoding or one of the native encodings "
+                f"({GEOARROW_ENCODINGS!r}) are supported, "
+                f"got: {column_metadata['encoding']}"
+            )

        if column_metadata.get("edges", "planar") == "spherical":
            warnings.warn(
@@ -245,37 +314,59 @@ def _validate_metadata(metadata):
                stacklevel=4,
            )

+        if "covering" in column_metadata:
+            covering = column_metadata["covering"]
+            if "bbox" in covering:
+                bbox = covering["bbox"]
+                for var in ["xmin", "ymin", "xmax", "ymax"]:
+                    if var not in bbox.keys():
+                        raise ValueError("Metadata for bbox column is malformed.")

-def _geopandas_to_arrow(df, index=None, schema_version=None):
+
+def _geopandas_to_arrow(
+    df,
+    index=None,
+    geometry_encoding="WKB",
+    schema_version=None,
+    write_covering_bbox=None,
+):
    """
    Helper function with main, shared logic for to_parquet/to_feather.
    """
-    from pyarrow import Table
+    from pyarrow import StructArray
+
+    from geopandas.io._geoarrow import geopandas_to_arrow

    _validate_dataframe(df)

-    # create geo metadata before altering incoming data frame
-    geo_metadata = _create_metadata(df, schema_version=schema_version)
+    if schema_version is not None:
+        if geometry_encoding != "WKB" and schema_version != "1.1.0":
+            raise ValueError(
+                "'geoarrow' encoding is only supported with schema version >= 1.1.0"
+            )

-    kwargs = {}
-    if compat.USE_SHAPELY_20:
-        kwargs = {"flavor": "iso"}
-    else:
-        for col in df.columns[df.dtypes == "geometry"]:
-            series = df[col]
-            if series.has_z.any():
-                warnings.warn(
-                    "The GeoDataFrame contains 3D geometries, and when using "
-                    "shapely < 2.0, such geometries will be written not exactly "
-                    "following to the GeoParquet spec (not using ISO WKB). For "
-                    "most use cases this should not be a problem (GeoPandas can "
-                    "read such files fine).",
-                    stacklevel=2,
-                )
-                break
-    df = df.to_wkb(**kwargs)
+    table, geometry_encoding_dict = geopandas_to_arrow(
+        df, geometry_encoding=geometry_encoding, index=index, interleaved=False
+    )
+    geo_metadata = _create_metadata(
+        df,
+        schema_version=schema_version,
+        geometry_encoding=geometry_encoding_dict,
+        write_covering_bbox=write_covering_bbox,
+    )

-    table = Table.from_pandas(df, preserve_index=index)
+    if write_covering_bbox:
+        if "bbox" in df.columns:
+            raise ValueError(
+                "An existing column 'bbox' already exists in the dataframe. "
+                "Please rename to write covering bbox."
+            )
+        bounds = df.bounds
+        bbox_array = StructArray.from_arrays(
+            [bounds["minx"], bounds["miny"], bounds["maxx"], bounds["maxy"]],
+            names=["xmin", "ymin", "xmax", "ymax"],
+        )
+        table = table.append_column("bbox", bbox_array)

    # Store geopandas specific file-level metadata
    # This must be done AFTER creating the table or it is not persisted
@@ -286,7 +377,14 @@ def _geopandas_to_arrow(df, index=None, schema_version=None):


 def _to_parquet(
-    df, path, index=None, compression="snappy", schema_version=None, **kwargs
+    df,
+    path,
+    index=None,
+    compression="snappy",
+    geometry_encoding="WKB",
+    schema_version=None,
+    write_covering_bbox=False,
+    **kwargs,
 ):
    """
    Write a GeoDataFrame to the Parquet format.
@@ -312,9 +410,17 @@ def _to_parquet(
        output except `RangeIndex` which is stored as metadata only.
    compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
        Name of the compression to use. Use ``None`` for no compression.
+    geometry_encoding : {'WKB', 'geoarrow'}, default 'WKB'
+        The encoding to use for the geometry columns. Defaults to "WKB"
+        for maximum interoperability. Specify "geoarrow" to use one of the
+        native GeoArrow-based single-geometry type encodings.
    schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
        GeoParquet specification version; if not provided will default to
        latest supported version.
+    write_covering_bbox : bool, default False
+        Writes the bounding box column for each row entry with column
+        name 'bbox'. Writing a bbox column can be computationally
+        expensive, hence is default setting is False.
    **kwargs
        Additional keyword arguments passed to pyarrow.parquet.write_table().
    """
@@ -322,19 +428,14 @@ def _to_parquet(
        "pyarrow.parquet", extra="pyarrow is required for Parquet support."
    )

-    if kwargs and "version" in kwargs and kwargs["version"] is not None:
-        if schema_version is None and kwargs["version"] in SUPPORTED_VERSIONS:
-            warnings.warn(
-                "the `version` parameter has been replaced with `schema_version`. "
-                "`version` will instead be passed directly to the underlying "
-                "parquet writer unless `version` is 0.1.0 or 0.4.0.",
-                FutureWarning,
-                stacklevel=2,
-            )
-            schema_version = kwargs.pop("version")
-
    path = _expand_user(path)
-    table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
+    table = _geopandas_to_arrow(
+        df,
+        index=index,
+        geometry_encoding=geometry_encoding,
+        schema_version=schema_version,
+        write_covering_bbox=write_covering_bbox,
+    )
    parquet.write_table(table, path, compression=compression, **kwargs)


@@ -379,47 +480,26 @@ def _to_feather(df, path, index=None, compression=None, schema_version=None, **k
    if Version(pyarrow.__version__) < Version("0.17.0"):
        raise ImportError("pyarrow >= 0.17 required for Feather support")

-    if kwargs and "version" in kwargs and kwargs["version"] is not None:
-        if schema_version is None and kwargs["version"] in SUPPORTED_VERSIONS:
-            warnings.warn(
-                "the `version` parameter has been replaced with `schema_version`. "
-                "`version` will instead be passed directly to the underlying "
-                "feather writer unless `version` is 0.1.0 or 0.4.0.",
-                FutureWarning,
-                stacklevel=2,
-            )
-            schema_version = kwargs.pop("version")
-
    path = _expand_user(path)
    table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
    feather.write_feather(table, path, compression=compression, **kwargs)


-def _arrow_to_geopandas(table, metadata=None):
+def _arrow_to_geopandas(table, geo_metadata=None):
    """
    Helper function with main, shared logic for read_parquet/read_feather.
    """
-    df = table.to_pandas()
-
-    metadata = metadata or table.schema.metadata
-
-    if metadata is None or b"geo" not in metadata:
-        raise ValueError(
-            """Missing geo metadata in Parquet/Feather file.
-            Use pandas.read_parquet/read_feather() instead."""
-        )
-
-    try:
-        metadata = _decode_metadata(metadata.get(b"geo", b""))
-
-    except (TypeError, json.decoder.JSONDecodeError):
-        raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
-
-    _validate_metadata(metadata)
+    if geo_metadata is None:
+        # Note: this path of not passing metadata is also used by dask-geopandas
+        geo_metadata = _validate_and_decode_metadata(table.schema.metadata)

    # Find all geometry columns that were read from the file.  May
    # be a subset if 'columns' parameter is used.
-    geometry_columns = df.columns.intersection(metadata["columns"])
+    geometry_columns = [
+        col for col in geo_metadata["columns"] if col in table.column_names
+    ]
+    result_column_names = list(table.slice(0, 0).to_pandas().columns)
+    geometry_columns.sort(key=result_column_names.index)

    if not len(geometry_columns):
        raise ValueError(
@@ -428,7 +508,7 @@ def _arrow_to_geopandas(table, metadata=None):
            use pandas.read_parquet/read_feather() instead."""
        )

-    geometry = metadata["primary_column"]
+    geometry = geo_metadata["primary_column"]

    # Missing geometry likely indicates a subset of columns was read;
    # promote the first available geometry to the primary geometry.
@@ -443,9 +523,12 @@ def _arrow_to_geopandas(table, metadata=None):
                stacklevel=3,
            )

+    table_attr = table.drop(geometry_columns)
+    df = table_attr.to_pandas()
+
    # Convert the WKB columns that are present back to geometry.
    for col in geometry_columns:
-        col_metadata = metadata["columns"][col]
+        col_metadata = geo_metadata["columns"][col]
        if "crs" in col_metadata:
            crs = col_metadata["crs"]
            if isinstance(crs, dict):
@@ -455,7 +538,19 @@ def _arrow_to_geopandas(table, metadata=None):
            # OGC:CRS84
            crs = "OGC:CRS84"

-        df[col] = from_wkb(df[col].values, crs=crs)
+        if col_metadata["encoding"] == "WKB":
+            geom_arr = from_wkb(np.array(table[col]), crs=crs)
+        else:
+            from geopandas.io._geoarrow import construct_shapely_array
+
+            geom_arr = from_shapely(
+                construct_shapely_array(
+                    table[col].combine_chunks(), "geoarrow." + col_metadata["encoding"]
+                ),
+                crs=crs,
+            )
+
+        df.insert(result_column_names.index(col), col, geom_arr)

    return GeoDataFrame(df, geometry=geometry)

@@ -521,7 +616,59 @@ def _ensure_arrow_fs(filesystem):
    return filesystem


-def _read_parquet(path, columns=None, storage_options=None, **kwargs):
+def _validate_and_decode_metadata(metadata):
+    if metadata is None or b"geo" not in metadata:
+        raise ValueError(
+            """Missing geo metadata in Parquet/Feather file.
+            Use pandas.read_parquet/read_feather() instead."""
+        )
+
+    # check for malformed metadata
+    try:
+        decoded_geo_metadata = _decode_metadata(metadata.get(b"geo", b""))
+    except (TypeError, json.decoder.JSONDecodeError):
+        raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
+
+    _validate_geo_metadata(decoded_geo_metadata)
+    return decoded_geo_metadata
+
+
+def _read_parquet_schema_and_metadata(path, filesystem):
+    """
+    Opening the Parquet file/dataset a first time to get the schema and metadata.
+
+    TODO: we should look into how we can reuse opened dataset for reading the
+    actual data, to avoid discovering the dataset twice (problem right now is
+    that the ParquetDataset interface doesn't allow passing the filters on read)
+
+    """
+    import pyarrow
+    from pyarrow import parquet
+
+    kwargs = {}
+    if Version(pyarrow.__version__) < Version("15.0.0"):
+        kwargs = dict(use_legacy_dataset=False)
+
+    try:
+        schema = parquet.ParquetDataset(path, filesystem=filesystem, **kwargs).schema
+    except Exception:
+        schema = parquet.read_schema(path, filesystem=filesystem)
+
+    metadata = schema.metadata
+
+    # read metadata separately to get the raw Parquet FileMetaData metadata
+    # (pyarrow doesn't properly exposes those in schema.metadata for files
+    # created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
+    if metadata is None or b"geo" not in metadata:
+        try:
+            metadata = parquet.read_metadata(path, filesystem=filesystem).metadata
+        except Exception:
+            pass
+
+    return schema, metadata
+
+
+def _read_parquet(path, columns=None, storage_options=None, bbox=None, **kwargs):
    """
    Load a Parquet object from the file path, returning a GeoDataFrame.

@@ -565,8 +712,13 @@ def _read_parquet(path, columns=None, storage_options=None, **kwargs):
        both ``pyarrow.fs`` and ``fsspec`` (e.g. "s3://") then the ``pyarrow.fs``
        filesystem is preferred. Provide the instantiated fsspec filesystem using
        the ``filesystem`` keyword if you wish to use its implementation.
+    bbox : tuple, optional
+        Bounding box to be used to filter selection from geoparquet data. This
+        is only usable if the data was saved with the bbox covering metadata.
+        Input is of the tuple format (xmin, ymin, xmax, ymax).
+
    **kwargs
-        Any additional kwargs passed to pyarrow.parquet.read_table().
+        Any additional kwargs passed to :func:`pyarrow.parquet.read_table`.

    Returns
    -------
@@ -595,29 +747,36 @@ def _read_parquet(path, columns=None, storage_options=None, **kwargs):
    filesystem, path = _get_filesystem_path(
        path, filesystem=filesystem, storage_options=storage_options
    )
-
    path = _expand_user(path)
+    schema, metadata = _read_parquet_schema_and_metadata(path, filesystem)
+
+    geo_metadata = _validate_and_decode_metadata(metadata)
+
+    bbox_filter = (
+        _get_parquet_bbox_filter(geo_metadata, bbox) if bbox is not None else None
+    )
+
+    if_bbox_column_exists = _check_if_covering_in_geo_metadata(geo_metadata)
+
+    # by default, bbox column is not read in, so must specify which
+    # columns are read in if it exists.
+    if not columns and if_bbox_column_exists:
+        columns = _get_non_bbox_columns(schema, geo_metadata)
+
+    # if both bbox and filters kwargs are used, must splice together.
+    if "filters" in kwargs:
+        filters_kwarg = kwargs.pop("filters")
+        filters = _splice_bbox_and_filters(filters_kwarg, bbox_filter)
+    else:
+        filters = bbox_filter
+
    kwargs["use_pandas_metadata"] = True
-    table = parquet.read_table(path, columns=columns, filesystem=filesystem, **kwargs)

-    # read metadata separately to get the raw Parquet FileMetaData metadata
-    # (pyarrow doesn't properly exposes those in schema.metadata for files
-    # created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
-    metadata = None
-    if table.schema.metadata is None or b"geo" not in table.schema.metadata:
-        try:
-            # read_metadata does not accept a filesystem keyword, so need to
-            # handle this manually (https://issues.apache.org/jira/browse/ARROW-16719)
-            if filesystem is not None:
-                pa_filesystem = _ensure_arrow_fs(filesystem)
-                with pa_filesystem.open_input_file(path) as source:
-                    metadata = parquet.read_metadata(source).metadata
-            else:
-                metadata = parquet.read_metadata(path).metadata
-        except Exception:
-            pass
+    table = parquet.read_table(
+        path, columns=columns, filesystem=filesystem, filters=filters, **kwargs
+    )

-    return _arrow_to_geopandas(table, metadata)
+    return _arrow_to_geopandas(table, geo_metadata)


 def _read_feather(path, columns=None, **kwargs):
@@ -677,11 +836,78 @@ def _read_feather(path, columns=None, **kwargs):
    )
    # TODO move this into `import_optional_dependency`
    import pyarrow
+
    import geopandas.io._pyarrow_hotfix  # noqa: F401

    if Version(pyarrow.__version__) < Version("0.17.0"):
        raise ImportError("pyarrow >= 0.17 required for Feather support")

    path = _expand_user(path)
+
    table = feather.read_table(path, columns=columns, **kwargs)
    return _arrow_to_geopandas(table)
+
+
+def _get_parquet_bbox_filter(geo_metadata, bbox):
+    primary_column = geo_metadata["primary_column"]
+
+    if _check_if_covering_in_geo_metadata(geo_metadata):
+        bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
+        return _convert_bbox_to_parquet_filter(bbox, bbox_column_name)
+
+    elif geo_metadata["columns"][primary_column]["encoding"] == "point":
+        import pyarrow.compute as pc
+
+        return (
+            (pc.field((primary_column, "x")) >= bbox[0])
+            & (pc.field((primary_column, "x")) <= bbox[2])
+            & (pc.field((primary_column, "y")) >= bbox[1])
+            & (pc.field((primary_column, "y")) <= bbox[3])
+        )
+
+    else:
+        raise ValueError(
+            "Specifying 'bbox' not supported for this Parquet file (it should either "
+            "have a bbox covering column or use 'point' encoding)."
+        )
+
+
+def _convert_bbox_to_parquet_filter(bbox, bbox_column_name):
+    import pyarrow.compute as pc
+
+    return ~(
+        (pc.field((bbox_column_name, "xmin")) > bbox[2])
+        | (pc.field((bbox_column_name, "ymin")) > bbox[3])
+        | (pc.field((bbox_column_name, "xmax")) < bbox[0])
+        | (pc.field((bbox_column_name, "ymax")) < bbox[1])
+    )
+
+
+def _check_if_covering_in_geo_metadata(geo_metadata):
+    primary_column = geo_metadata["primary_column"]
+    return "covering" in geo_metadata["columns"][primary_column].keys()
+
+
+def _get_bbox_encoding_column_name(geo_metadata):
+    primary_column = geo_metadata["primary_column"]
+    return geo_metadata["columns"][primary_column]["covering"]["bbox"]["xmin"][0]
+
+
+def _get_non_bbox_columns(schema, geo_metadata):
+
+    bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
+    columns = schema.names
+    if bbox_column_name in columns:
+        columns.remove(bbox_column_name)
+    return columns
+
+
+def _splice_bbox_and_filters(kwarg_filters, bbox_filter):
+    parquet = import_optional_dependency(
+        "pyarrow.parquet", extra="pyarrow is required for Parquet support."
+    )
+    if bbox_filter is None:
+        return kwarg_filters
+
+    filters_expression = parquet.filters_to_expression(kwarg_filters)
+    return bbox_filter & filters_expression
--- a/.venv/lib/python3.12/site-packages/geopandas/io/file.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/file.py
@@ -1,30 +1,33 @@
+from __future__ import annotations
+
 import os
+import urllib.request
+import warnings
+from io import IOBase
 from packaging.version import Version
 from pathlib import Path
-import warnings
+
+# Adapted from pandas.io.common
+from urllib.parse import urlparse as parse_url
+from urllib.parse import uses_netloc, uses_params, uses_relative

 import numpy as np
 import pandas as pd
 from pandas.api.types import is_integer_dtype

-import pyproj
+import shapely
 from shapely.geometry import mapping
 from shapely.geometry.base import BaseGeometry

 from geopandas import GeoDataFrame, GeoSeries
-
-# Adapted from pandas.io.common
-from urllib.parse import urlparse as parse_url
-from urllib.parse import uses_netloc, uses_params, uses_relative
-import urllib.request
-
+from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
+from geopandas.io.util import vsi_path

 _VALID_URLS = set(uses_relative + uses_netloc + uses_params)
 _VALID_URLS.discard("")
 # file:// URIs are supported by fiona/pyogrio -> don't already open + read the file here
 _VALID_URLS.discard("file")

-
 fiona = None
 fiona_env = None
 fiona_import_error = None
@@ -55,6 +58,7 @@ def _import_fiona():
            FIONA_GE_19 = Version(Version(fiona.__version__).base_version) >= Version(
                "1.9.0"
            )
+
        except ImportError as err:
            fiona = False
            fiona_import_error = str(err)
@@ -71,13 +75,14 @@ def _import_pyogrio():
    if pyogrio is None:
        try:
            import pyogrio
+
        except ImportError as err:
            pyogrio = False
            pyogrio_import_error = str(err)


 def _check_fiona(func):
-    if fiona is None:
+    if not fiona:
        raise ImportError(
            f"the {func} requires the 'fiona' package, but it is not installed or does "
            f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}"
@@ -85,7 +90,7 @@ def _check_fiona(func):


 def _check_pyogrio(func):
-    if pyogrio is None:
+    if not pyogrio:
        raise ImportError(
            f"the {func} requires the 'pyogrio' package, but it is not installed "
            "or does not import correctly."
@@ -93,35 +98,49 @@ def _check_pyogrio(func):
        )


+def _check_metadata_supported(metadata: str | None, engine: str, driver: str) -> None:
+    if metadata is None:
+        return
+    if driver != "GPKG":
+        raise NotImplementedError(
+            "The 'metadata' keyword is only supported for the GPKG driver."
+        )
+
+    if engine == "fiona" and not FIONA_GE_19:
+        raise NotImplementedError(
+            "The 'metadata' keyword is only supported for Fiona >= 1.9."
+        )
+
+
 def _check_engine(engine, func):
-    # if not specified through keyword or option, then default to "fiona" if
-    # installed, otherwise try pyogrio
+    # if not specified through keyword or option, then default to "pyogrio" if
+    # installed, otherwise try fiona
    if engine is None:
        import geopandas

        engine = geopandas.options.io_engine

    if engine is None:
-        _import_fiona()
-        if fiona:
-            engine = "fiona"
+        _import_pyogrio()
+        if pyogrio:
+            engine = "pyogrio"
        else:
-            _import_pyogrio()
-            if pyogrio:
-                engine = "pyogrio"
+            _import_fiona()
+            if fiona:
+                engine = "fiona"

-    if engine == "fiona":
-        _import_fiona()
-        _check_fiona(func)
-    elif engine == "pyogrio":
+    if engine == "pyogrio":
        _import_pyogrio()
        _check_pyogrio(func)
+    elif engine == "fiona":
+        _import_fiona()
+        _check_fiona(func)
    elif engine is None:
        raise ImportError(
            f"The {func} requires the 'pyogrio' or 'fiona' package, "
            "but neither is installed or imports correctly."
-            f"\nImporting fiona resulted in: {fiona_import_error}"
            f"\nImporting pyogrio resulted in: {pyogrio_import_error}"
+            f"\nImporting fiona resulted in: {fiona_import_error}"
        )

    return engine
@@ -168,31 +187,12 @@ def _is_url(url):
        return False


-def _is_zip(path):
-    """Check if a given path is a zipfile"""
-    parsed = fiona.path.ParsedPath.from_uri(path)
-    return (
-        parsed.archive.endswith(".zip")
-        if parsed.archive
-        else parsed.path.endswith(".zip")
-    )
-
-
-def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs):
+def _read_file(
+    filename, bbox=None, mask=None, columns=None, rows=None, engine=None, **kwargs
+):
    """
    Returns a GeoDataFrame from a file or URL.

-    .. note::
-
-        GeoPandas currently defaults to use Fiona as the engine in ``read_file``.
-        However, GeoPandas 1.0 will switch to use pyogrio as the default engine, since
-        pyogrio can provide a significant speedup compared to Fiona. We recommend to
-        already install pyogrio and specify the engine by using the ``engine`` keyword
-        (``geopandas.read_file(..., engine="pyogrio")``), or by setting the default for
-        the ``engine`` keyword globally with::
-
-            geopandas.options.io_engine = "pyogrio"
-
    Parameters
    ----------
    filename : str, path object or file-like object
@@ -209,21 +209,28 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
        Filter for features that intersect with the given dict-like geojson
        geometry, GeoSeries, GeoDataFrame or shapely geometry.
        CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
-        Cannot be used with bbox.
+        Cannot be used with bbox. If multiple geometries are passed, this will
+        first union all geometries, which may be computationally expensive.
+    columns : list, optional
+        List of column names to import from the data source. Column names
+        must exactly match the names in the data source. To avoid reading
+        any columns (besides the geometry column), pass an empty list-like.
+        By default reads all columns.
    rows : int or slice, default None
        Load in specific rows by passing an integer (first `n` rows) or a
        slice() object.
-    engine : str, "fiona" or "pyogrio"
+    engine : str,  "pyogrio" or "fiona"
        The underlying library that is used to read the file. Currently, the
-        supported options are "fiona" and "pyogrio". Defaults to "fiona" if
-        installed, otherwise tries "pyogrio".
+        supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
+        installed, otherwise tries "fiona". Engine can also be set globally
+        with the ``geopandas.options.io_engine`` option.
    **kwargs :
-        Keyword args to be passed to the engine. In case of the "fiona" engine,
-        the keyword arguments are passed to :func:`fiona.open` or
-        :class:`fiona.collection.BytesCollection` when opening the file.
-        For more information on possible keywords, type:
-        ``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
-        the keyword arguments are passed to :func:`pyogrio.read_dataframe`.
+        Keyword args to be passed to the engine, and can be used to write
+        to multi-layer data, store data within archives (zip files), etc.
+        In case of the "pyogrio" engine, the keyword arguments are passed to
+        `pyogrio.write_dataframe`. In case of the "fiona" engine, the keyword
+        arguments are passed to fiona.open`. For more information on possible
+        keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.


    Examples
@@ -284,7 +291,9 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
                from_bytes = True

    if engine == "pyogrio":
-        return _read_file_pyogrio(filename, bbox=bbox, mask=mask, rows=rows, **kwargs)
+        return _read_file_pyogrio(
+            filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
+        )

    elif engine == "fiona":
        if pd.api.types.is_file_like(filename):
@@ -295,7 +304,13 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
            path_or_bytes = filename

        return _read_file_fiona(
-            path_or_bytes, from_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
+            path_or_bytes,
+            from_bytes,
+            bbox=bbox,
+            mask=mask,
+            columns=columns,
+            rows=rows,
+            **kwargs,
        )

    else:
@@ -303,31 +318,36 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)


 def _read_file_fiona(
-    path_or_bytes, from_bytes, bbox=None, mask=None, rows=None, where=None, **kwargs
+    path_or_bytes,
+    from_bytes,
+    bbox=None,
+    mask=None,
+    columns=None,
+    rows=None,
+    where=None,
+    **kwargs,
 ):
    if where is not None and not FIONA_GE_19:
        raise NotImplementedError("where requires fiona 1.9+")

+    if columns is not None:
+        if "include_fields" in kwargs:
+            raise ValueError(
+                "Cannot specify both 'include_fields' and 'columns' keywords"
+            )
+        if not FIONA_GE_19:
+            raise NotImplementedError("'columns' keyword requires fiona 1.9+")
+        kwargs["include_fields"] = columns
+    elif "include_fields" in kwargs:
+        # alias to columns, as this variable is used below to specify column order
+        # in the dataframe creation
+        columns = kwargs["include_fields"]
+
    if not from_bytes:
        # Opening a file via URL or file-like-object above automatically detects a
        # zipped file. In order to match that behavior, attempt to add a zip scheme
        # if missing.
-        if _is_zip(str(path_or_bytes)):
-            parsed = fiona.parse_path(str(path_or_bytes))
-            if isinstance(parsed, fiona.path.ParsedPath):
-                # If fiona is able to parse the path, we can safely look at the scheme
-                # and update it to have a zip scheme if necessary.
-                schemes = (parsed.scheme or "").split("+")
-                if "zip" not in schemes:
-                    parsed.scheme = "+".join(["zip"] + schemes)
-                path_or_bytes = parsed.name
-            elif isinstance(parsed, fiona.path.UnparsedPath) and not str(
-                path_or_bytes
-            ).startswith("/vsi"):
-                # If fiona is unable to parse the path, it might have a Windows drive
-                # scheme. Try adding zip:// to the front. If the path starts with "/vsi"
-                # it is a legacy GDAL path type, so let it pass unmodified.
-                path_or_bytes = "zip://" + parsed.name
+        path_or_bytes = vsi_path(str(path_or_bytes))

    if from_bytes:
        reader = fiona.BytesCollection
@@ -359,7 +379,7 @@ def _read_file_fiona(
                assert len(bbox) == 4
            # handle loading the mask
            elif isinstance(mask, (GeoDataFrame, GeoSeries)):
-                mask = mapping(mask.to_crs(crs).unary_union)
+                mask = mapping(mask.to_crs(crs).union_all())
            elif isinstance(mask, BaseGeometry):
                mask = mapping(mask)

@@ -383,11 +403,14 @@ def _read_file_fiona(
            else:
                f_filt = features
            # get list of columns
-            columns = list(features.schema["properties"])
+            columns = columns or list(features.schema["properties"])
            datetime_fields = [
                k for (k, v) in features.schema["properties"].items() if v == "datetime"
            ]
-            if kwargs.get("ignore_geometry", False):
+            if (
+                kwargs.get("ignore_geometry", False)
+                or features.schema["geometry"] == "None"
+            ):
                df = pd.DataFrame(
                    [record["properties"] for record in f_filt], columns=columns
                )
@@ -396,16 +419,39 @@ def _read_file_fiona(
                    f_filt, crs=crs, columns=columns + ["geometry"]
                )
            for k in datetime_fields:
-                as_dt = pd.to_datetime(df[k], errors="ignore")
-                # if to_datetime failed, try again for mixed timezone offsets
-                if as_dt.dtype == "object":
+                as_dt = None
+                # plain try catch for when pandas will raise in the future
+                # TODO we can tighten the exception type in future when it does
+                try:
+                    with warnings.catch_warnings():
+                        # pandas 2.x does not yet enforce this behaviour but raises a
+                        # warning  -> we want to to suppress this warning for our users,
+                        # and do this by turning it into an error so we take the
+                        # `except` code path to try again with utc=True
+                        warnings.filterwarnings(
+                            "error",
+                            "In a future version of pandas, parsing datetimes with "
+                            "mixed time zones will raise an error",
+                            FutureWarning,
+                        )
+                        as_dt = pd.to_datetime(df[k])
+                except Exception:
+                    pass
+                if as_dt is None or as_dt.dtype == "object":
+                    # if to_datetime failed, try again for mixed timezone offsets
                    # This can still fail if there are invalid datetimes
-                    as_dt = pd.to_datetime(df[k], errors="ignore", utc=True)
+                    try:
+                        as_dt = pd.to_datetime(df[k], utc=True)
+                    except Exception:
+                        pass
                # if to_datetime succeeded, round datetimes as
                # fiona only supports up to ms precision (any microseconds are
                # floating point rounding error)
-                if not (as_dt.dtype == "object"):
-                    df[k] = as_dt.dt.round(freq="ms")
+                if as_dt is not None and not (as_dt.dtype == "object"):
+                    if PANDAS_GE_20:
+                        df[k] = as_dt.dt.as_unit("ms")
+                    else:
+                        df[k] = as_dt.dt.round(freq="ms")
            return df


@@ -428,48 +474,79 @@ def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs)
                raise ValueError("slice with step is not supported")
        else:
            raise TypeError("'rows' must be an integer or a slice.")
+
+    if bbox is not None and mask is not None:
+        # match error message from Fiona
+        raise ValueError("mask and bbox can not be set together")
+
    if bbox is not None:
        if isinstance(bbox, (GeoDataFrame, GeoSeries)):
-            bbox = tuple(bbox.total_bounds)
+            crs = pyogrio.read_info(path_or_bytes).get("crs")
+            if isinstance(path_or_bytes, IOBase):
+                path_or_bytes.seek(0)
+
+            bbox = tuple(bbox.to_crs(crs).total_bounds)
        elif isinstance(bbox, BaseGeometry):
            bbox = bbox.bounds
        if len(bbox) != 4:
            raise ValueError("'bbox' should be a length-4 tuple.")
+
    if mask is not None:
-        raise ValueError(
-            "The 'mask' keyword is not supported with the 'pyogrio' engine. "
-            "You can use 'bbox' instead."
-        )
+        # NOTE: mask cannot be used at same time as bbox keyword
+        if isinstance(mask, (GeoDataFrame, GeoSeries)):
+            crs = pyogrio.read_info(path_or_bytes).get("crs")
+            if isinstance(path_or_bytes, IOBase):
+                path_or_bytes.seek(0)
+
+            mask = shapely.unary_union(mask.to_crs(crs).geometry.values)
+        elif isinstance(mask, BaseGeometry):
+            mask = shapely.unary_union(mask)
+        elif isinstance(mask, dict) or hasattr(mask, "__geo_interface__"):
+            # convert GeoJSON to shapely geometry
+            mask = shapely.geometry.shape(mask)
+
+        kwargs["mask"] = mask
+
    if kwargs.pop("ignore_geometry", False):
        kwargs["read_geometry"] = False

-    # TODO: if bbox is not None, check its CRS vs the CRS of the file
+    # translate `ignore_fields`/`include_fields` keyword for back compat with fiona
+    if "ignore_fields" in kwargs and "include_fields" in kwargs:
+        raise ValueError("Cannot specify both 'ignore_fields' and 'include_fields'")
+    elif "ignore_fields" in kwargs:
+        if kwargs.get("columns", None) is not None:
+            raise ValueError(
+                "Cannot specify both 'columns' and 'ignore_fields' keywords"
+            )
+        warnings.warn(
+            "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
+            "will be removed in a future release. You can use the 'columns' keyword "
+            "instead to select which columns to read.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        ignore_fields = kwargs.pop("ignore_fields")
+        fields = pyogrio.read_info(path_or_bytes)["fields"]
+        include_fields = [col for col in fields if col not in ignore_fields]
+        kwargs["columns"] = include_fields
+    elif "include_fields" in kwargs:
+        # translate `include_fields` keyword for back compat with fiona engine
+        if kwargs.get("columns", None) is not None:
+            raise ValueError(
+                "Cannot specify both 'columns' and 'include_fields' keywords"
+            )
+        warnings.warn(
+            "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
+            "will be removed in a future release. You can use the 'columns' keyword "
+            "instead to select which columns to read.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        kwargs["columns"] = kwargs.pop("include_fields")
+
    return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)


-def read_file(*args, **kwargs):
-    warnings.warn(
-        "geopandas.io.file.read_file() is intended for internal "
-        "use only, and will be deprecated. Use geopandas.read_file() instead.",
-        FutureWarning,
-        stacklevel=2,
-    )
-
-    return _read_file(*args, **kwargs)
-
-
-def to_file(*args, **kwargs):
-    warnings.warn(
-        "geopandas.io.file.to_file() is intended for internal "
-        "use only, and will be deprecated. Use GeoDataFrame.to_file() "
-        "or GeoSeries.to_file() instead.",
-        FutureWarning,
-        stacklevel=2,
-    )
-
-    return _to_file(*args, **kwargs)
-
-
 def _detect_driver(path):
    """
    Attempt to auto-detect driver based on the extension
@@ -497,25 +574,16 @@ def _to_file(
    mode="w",
    crs=None,
    engine=None,
+    metadata=None,
    **kwargs,
 ):
    """
    Write this GeoDataFrame to an OGR data source

    A dictionary of supported OGR providers is available via:
-    >>> import fiona
-    >>> fiona.supported_drivers  # doctest: +SKIP

-    .. note::
-
-        GeoPandas currently defaults to use Fiona as the engine in ``to_file``.
-        However, GeoPandas 1.0 will switch to use pyogrio as the default engine, since
-        pyogrio can provide a significant speedup compared to Fiona. We recommend to
-        already install pyogrio and specify the engine by using the ``engine`` keyword
-        (``df.to_file(..., engine="pyogrio")``), or by setting the default for
-        the ``engine`` keyword globally with::
-
-            geopandas.options.io_engine = "pyogrio"
+    >>> import pyogrio
+    >>> pyogrio.list_drivers()  # doctest: +SKIP

    Parameters
    ----------
@@ -557,10 +625,15 @@ def _to_file(
        The value can be anything accepted
        by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
        such as an authority string (eg "EPSG:4326") or a WKT string.
-    engine : str, "fiona" or "pyogrio"
-        The underlying library that is used to write the file. Currently, the
-        supported options are "fiona" and "pyogrio". Defaults to "fiona" if
-        installed, otherwise tries "pyogrio".
+    engine : str,  "pyogrio" or "fiona"
+        The underlying library that is used to read the file. Currently, the
+        supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
+        installed, otherwise tries "fiona". Engine can also be set globally
+        with the ``geopandas.options.io_engine`` option.
+    metadata : dict[str, str], default None
+        Optional metadata to be stored in the file. Keys and values must be
+        strings. Only supported for the "GPKG" driver
+        (requires Fiona >= 1.9 or pyogrio >= 0.6).
    **kwargs :
        Keyword args to be passed to the engine, and can be used to write
        to multi-layer data, store data within archives (zip files), etc.
@@ -604,44 +677,57 @@ def _to_file(
            "to a supported format like a well-known text (WKT) using "
            "`GeoSeries.to_wkt()`.",
        )
+    _check_metadata_supported(metadata, engine, driver)

    if mode not in ("w", "a"):
        raise ValueError(f"'mode' should be one of 'w' or 'a', got '{mode}' instead")

-    if engine == "fiona":
-        _to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs)
-    elif engine == "pyogrio":
-        _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs)
+    if engine == "pyogrio":
+        _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs)
+    elif engine == "fiona":
+        _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs)
    else:
        raise ValueError(f"unknown engine '{engine}'")


-def _to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs):
+def _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs):
+    if not HAS_PYPROJ and crs:
+        raise ImportError(
+            "The 'pyproj' package is required to write a file with a CRS, but it is not"
+            " installed or does not import correctly."
+        )
+
    if schema is None:
        schema = infer_schema(df)

    if crs:
-        crs = pyproj.CRS.from_user_input(crs)
+        from pyproj import CRS
+
+        crs = CRS.from_user_input(crs)
    else:
        crs = df.crs

    with fiona_env():
        crs_wkt = None
        try:
-            gdal_version = fiona.env.get_gdal_release_name()
-        except AttributeError:
-            gdal_version = "2.0.0"  # just assume it is not the latest
-        if Version(gdal_version) >= Version("3.0.0") and crs:
+            gdal_version = Version(
+                fiona.env.get_gdal_release_name().strip("e")
+            )  # GH3147
+        except (AttributeError, ValueError):
+            gdal_version = Version("2.0.0")  # just assume it is not the latest
+        if gdal_version >= Version("3.0.0") and crs:
            crs_wkt = crs.to_wkt()
        elif crs:
            crs_wkt = crs.to_wkt("WKT1_GDAL")
        with fiona.open(
            filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
        ) as colxn:
+            if metadata is not None:
+                colxn.update_tags(metadata)
            colxn.writerecords(df.iterfeatures())


-def _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs):
+def _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs):
    import pyogrio

    if schema is not None:
@@ -653,13 +739,13 @@ def _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs):
        kwargs["append"] = True

    if crs is not None:
-        raise ValueError("Passing 'crs' it not supported with the 'pyogrio' engine.")
+        raise ValueError("Passing 'crs' is not supported with the 'pyogrio' engine.")

    # for the fiona engine, this check is done in gdf.iterfeatures()
    if not df.columns.is_unique:
        raise ValueError("GeoDataFrame cannot contain duplicated column names.")

-    pyogrio.write_dataframe(df, filename, driver=driver, **kwargs)
+    pyogrio.write_dataframe(df, filename, driver=driver, metadata=metadata, **kwargs)


 def infer_schema(df):
@@ -732,3 +818,34 @@ def _geometry_types(df):
        geom_types = geom_types[0]

    return geom_types
+
+
+def _list_layers(filename) -> pd.DataFrame:
+    """List layers available in a file.
+
+    Provides an overview of layers available in a file or URL together with their
+    geometry types. When supported by the data source, this includes both spatial and
+    non-spatial layers. Non-spatial layers are indicated by the ``"geometry_type"``
+    column being ``None``. GeoPandas will not read such layers but they can be read into
+    a pd.DataFrame using :func:`pyogrio.read_dataframe`.
+
+    Parameters
+    ----------
+    filename : str, path object or file-like object
+        Either the absolute or relative path to the file or URL to
+        be opened, or any object with a read() method (such as an open file
+        or StringIO)
+
+    Returns
+    -------
+    pandas.DataFrame
+        A DataFrame with columns "name" and "geometry_type" and one row per layer.
+    """
+    _import_pyogrio()
+    _check_pyogrio("list_layers")
+
+    import pyogrio
+
+    return pd.DataFrame(
+        pyogrio.list_layers(filename), columns=["name", "geometry_type"]
+    )
--- a/.venv/lib/python3.12/site-packages/geopandas/io/sql.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/sql.py
@@ -1,5 +1,6 @@
 import warnings
 from contextlib import contextmanager
+from functools import lru_cache

 import pandas as pd

@@ -8,8 +9,6 @@ import shapely.wkb

 from geopandas import GeoDataFrame

-from geopandas import _compat as compat
-

@contextmanager
 def _get_conn(conn_or_engine):
@@ -28,7 +27,7 @@ def _get_conn(conn_or_engine):
    -------
    Connection
    """
-    from sqlalchemy.engine.base import Engine, Connection
+    from sqlalchemy.engine.base import Connection, Engine

    if isinstance(conn_or_engine, Connection):
        if not conn_or_engine.in_transaction():
@@ -43,7 +42,7 @@ def _get_conn(conn_or_engine):
        raise ValueError(f"Unknown Connectable: {conn_or_engine}")


-def _df_to_geodf(df, geom_col="geom", crs=None):
+def _df_to_geodf(df, geom_col="geom", crs=None, con=None):
    """
    Transforms a pandas DataFrame into a GeoDataFrame.
    The column 'geom_col' must be a geometry column in WKB representation.
@@ -60,6 +59,8 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
        such as an authority string (eg "EPSG:4326") or a WKT string.
        If not set, tries to determine CRS from the SRID associated with the
        first geometry in the database, and assigns that to all geometries.
+    con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
+        Active connection to the database to query.
    Returns
    -------
    GeoDataFrame
@@ -80,10 +81,6 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
        load_geom_bytes = shapely.wkb.loads
        """Load from Python 3 binary."""

-        def load_geom_buffer(x):
-            """Load from Python 2 binary."""
-            return shapely.wkb.loads(str(x))
-
        def load_geom_text(x):
            """Load from binary encoded as text."""
            return shapely.wkb.loads(str(x), hex=True)
@@ -95,13 +92,31 @@ def _df_to_geodf(df, geom_col="geom", crs=None):

        df[geom_col] = geoms = geoms.apply(load_geom)
        if crs is None:
-            if compat.SHAPELY_GE_20:
-                srid = shapely.get_srid(geoms.iat[0])
-            else:
-                srid = shapely.geos.lgeos.GEOSGetSRID(geoms.iat[0]._geom)
+            srid = shapely.get_srid(geoms.iat[0])
            # if no defined SRID in geodatabase, returns SRID of 0
            if srid != 0:
-                crs = "epsg:{}".format(srid)
+                try:
+                    spatial_ref_sys_df = _get_spatial_ref_sys_df(con, srid)
+                except pd.errors.DatabaseError:
+                    warning_msg = (
+                        f"Could not find the spatial reference system table "
+                        f"(spatial_ref_sys) in PostGIS."
+                        f"Trying epsg:{srid} as a fallback."
+                    )
+                    warnings.warn(warning_msg, UserWarning, stacklevel=3)
+                    crs = "epsg:{}".format(srid)
+                else:
+                    if not spatial_ref_sys_df.empty:
+                        auth_name = spatial_ref_sys_df["auth_name"].item()
+                        crs = f"{auth_name}:{srid}"
+                    else:
+                        warning_msg = (
+                            f"Could not find srid {srid} in the "
+                            f"spatial_ref_sys table. "
+                            f"Trying epsg:{srid} as a fallback."
+                        )
+                        warnings.warn(warning_msg, UserWarning, stacklevel=3)
+                        crs = "epsg:{}".format(srid)

    return GeoDataFrame(df, crs=crs, geometry=geom_col)

@@ -176,7 +191,7 @@ def _read_postgis(
            params=params,
            chunksize=chunksize,
        )
-        return _df_to_geodf(df, geom_col=geom_col, crs=crs)
+        return _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con)

    else:
        # read data in chunks and return a generator
@@ -189,20 +204,9 @@ def _read_postgis(
            params=params,
            chunksize=chunksize,
        )
-        return (_df_to_geodf(df, geom_col=geom_col, crs=crs) for df in df_generator)
-
-
-def read_postgis(*args, **kwargs):
-    import warnings
-
-    warnings.warn(
-        "geopandas.io.sql.read_postgis() is intended for internal "
-        "use only, and will be deprecated. Use geopandas.read_postgis() instead.",
-        FutureWarning,
-        stacklevel=2,
-    )
-
-    return _read_postgis(*args, **kwargs)
+        return (
+            _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con) for df in df_generator
+        )


 def _get_geometry_type(gdf):
@@ -253,7 +257,7 @@ def _get_geometry_type(gdf):

 def _get_srid_from_crs(gdf):
    """
-    Get EPSG code from CRS if available. If not, return -1.
+    Get EPSG code from CRS if available. If not, return 0.
    """

    # Use geoalchemy2 default for srid
@@ -279,7 +283,7 @@ def _get_srid_from_crs(gdf):
            warnings.warn(warning_msg, UserWarning, stacklevel=2)

    if srid is None:
-        srid = -1
+        srid = 0
        warnings.warn(warning_msg, UserWarning, stacklevel=2)

    return srid
@@ -288,8 +292,8 @@ def _get_srid_from_crs(gdf):
 def _convert_linearring_to_linestring(gdf, geom_name):
    from shapely.geometry import LineString

-    # Todo: Use Pygeos function once it's implemented:
-    #  https://github.com/pygeos/pygeos/issues/76
+    # Todo: Use shapely function once it's implemented:
+    # https://github.com/shapely/shapely/issues/1617

    mask = gdf.geom_type == "LinearRing"
    gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
@@ -300,26 +304,11 @@ def _convert_linearring_to_linestring(gdf, geom_name):

 def _convert_to_ewkb(gdf, geom_name, srid):
    """Convert geometries to ewkb."""
-    if compat.USE_SHAPELY_20:
-        geoms = shapely.to_wkb(
-            shapely.set_srid(gdf[geom_name].values._data, srid=srid),
-            hex=True,
-            include_srid=True,
-        )
-
-    elif compat.USE_PYGEOS:
-        from pygeos import set_srid, to_wkb
-
-        geoms = to_wkb(
-            set_srid(gdf[geom_name].values._data, srid=srid),
-            hex=True,
-            include_srid=True,
-        )
-
-    else:
-        from shapely.wkb import dumps
-
-        geoms = [dumps(geom, srid=srid, hex=True) for geom in gdf[geom_name]]
+    geoms = shapely.to_wkb(
+        shapely.set_srid(gdf[geom_name].values._data, srid=srid),
+        hex=True,
+        include_srid=True,
+    )

    # The gdf will warn that the geometry column doesn't hold in-memory geometries
    # now that they are EWKB, so convert back to a regular dataframe to avoid warning
@@ -330,8 +319,8 @@ def _convert_to_ewkb(gdf, geom_name, srid):


 def _psql_insert_copy(tbl, conn, keys, data_iter):
-    import io
    import csv
+    import io

    s_buf = io.StringIO()
    writer = csv.writer(s_buf)
@@ -341,11 +330,16 @@ def _psql_insert_copy(tbl, conn, keys, data_iter):
    columns = ", ".join('"{}"'.format(k) for k in keys)

    dbapi_conn = conn.connection
+    sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
+        tbl.table.schema, tbl.table.name, columns
+    )
    with dbapi_conn.cursor() as cur:
-        sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
-            tbl.table.schema, tbl.table.name, columns
-        )
-        cur.copy_expert(sql=sql, file=s_buf)
+        # Use psycopg method if it's available
+        if hasattr(cur, "copy") and callable(cur.copy):
+            with cur.copy(sql) as copy:
+                copy.write(s_buf.read())
+        else:  # otherwise use psycopg2 method
+            cur.copy_expert(sql, s_buf)


 def _write_postgis(
@@ -469,3 +463,11 @@ def _write_postgis(
            dtype=dtype,
            method=_psql_insert_copy,
        )
+
+
+@lru_cache
+def _get_spatial_ref_sys_df(con, srid):
+    spatial_ref_sys_sql = (
+        f"SELECT srid, auth_name FROM spatial_ref_sys WHERE srid = {srid}"
+    )
+    return pd.read_sql(spatial_ref_sys_sql, con)
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/generate_legacy_storage_files.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/generate_legacy_storage_files.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_arrow.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_arrow.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_file.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_file.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_file_geom_types_drivers.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_file_geom_types_drivers.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_infer_schema.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_infer_schema.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_pickle.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_pickle.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_sql.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/pycache/test_sql.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/generate_legacy_storage_files.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/generate_legacy_storage_files.py
@@ -19,6 +19,7 @@ pickles and test versus the current data that is generated
 (with master). These are then compared.

 """
+
 import os
 import pickle
 import platform
@@ -26,9 +27,10 @@ import sys

 import pandas as pd

-import geopandas
 from shapely.geometry import Point

+import geopandas
+

 def create_pickle_data():
    """create the pickle data"""
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_arrow.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_arrow.py
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file.py
@@ -1,33 +1,41 @@
 import datetime
 import io
+import json
 import os
 import pathlib
+import shutil
 import tempfile
 from collections import OrderedDict
+from packaging.version import Version

 import numpy as np
 import pandas as pd
-import pytest
 import pytz
-from packaging.version import Version
 from pandas.api.types import is_datetime64_any_dtype
-from pandas.testing import assert_series_equal
-from shapely.geometry import Point, Polygon, box
+
+from shapely.geometry import Point, Polygon, box, mapping

 import geopandas
 from geopandas import GeoDataFrame, read_file
-from geopandas._compat import PANDAS_GE_20
-from geopandas.io.file import _detect_driver, _EXTENSION_TO_DRIVER
+from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20, PANDAS_GE_30
+from geopandas.io.file import _EXTENSION_TO_DRIVER, _detect_driver
+
+import pytest
 from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
 from geopandas.tests.util import PACKAGE_DIR, validate_boro_df
+from pandas.testing import assert_frame_equal, assert_series_equal

 try:
    import pyogrio

-    PYOGRIO_GE_07 = Version(pyogrio.__version__) > Version("0.6.0")
+    # those version checks have to be defined here instead of imported from
+    # geopandas.io.file (those are only initialized lazily on first usage)
+    PYOGRIO_GE_090 = Version(Version(pyogrio.__version__).base_version) >= Version(
+        "0.9.0"
+    )
 except ImportError:
    pyogrio = False
-    PYOGRIO_GE_07 = False
+    PYOGRIO_GE_090 = False


 try:
@@ -46,6 +54,9 @@ FIONA_MARK = pytest.mark.skipif(not fiona, reason="fiona not installed")
 _CRS = "epsg:4326"


+pytestmark = pytest.mark.filterwarnings("ignore:Value:RuntimeWarning:pyogrio")
+
+
@pytest.fixture(
    params=[
        pytest.param("fiona", marks=FIONA_MARK),
@@ -62,9 +73,8 @@ def skip_pyogrio_not_supported(engine):


@pytest.fixture
-def df_nybb(engine):
-    nybb_path = geopandas.datasets.get_path("nybb")
-    df = read_file(nybb_path, engine=engine)
+def df_nybb(engine, nybb_filename):
+    df = read_file(nybb_filename, engine=engine)
    return df


@@ -130,7 +140,7 @@ def test_to_file(tmpdir, df_nybb, df_null, driver, ext, engine):
    df = GeoDataFrame.from_file(tempfilename, engine=engine)
    assert "geometry" in df
    assert len(df) == 5
-    assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"])
+    assert np.all(df["BoroName"].values == df_nybb["BoroName"])

    # Write layer with null geometry out to file
    tempfilename = os.path.join(str(tmpdir), "null_geom" + ext)
@@ -139,7 +149,7 @@ def test_to_file(tmpdir, df_nybb, df_null, driver, ext, engine):
    df = GeoDataFrame.from_file(tempfilename, engine=engine)
    assert "geometry" in df
    assert len(df) == 2
-    assert np.alltrue(df["Name"].values == df_null["Name"])
+    assert np.all(df["Name"].values == df_null["Name"])
    # check the expected driver
    assert_correct_driver(tempfilename, ext, engine)

@@ -153,7 +163,7 @@ def test_to_file_pathlib(tmpdir, df_nybb, driver, ext, engine):
    df = GeoDataFrame.from_file(temppath, engine=engine)
    assert "geometry" in df
    assert len(df) == 5
-    assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"])
+    assert np.all(df["BoroName"].values == df_nybb["BoroName"])
    # check the expected driver
    assert_correct_driver(temppath, ext, engine)

@@ -174,9 +184,10 @@ def test_to_file_bool(tmpdir, driver, ext, engine):
    result = read_file(tempfilename, engine=engine)
    if ext in (".shp", ""):
        # Shapefile does not support boolean, so is read back as int
-        if engine == "fiona":
+        # but since GDAL 3.9 supports boolean fields in SHP
+        if engine == "fiona" and fiona.gdal_version.minor < 9:
            df["col"] = df["col"].astype("int64")
-        else:
+        elif engine == "pyogrio" and pyogrio.__gdal_version__ < (3, 9):
            df["col"] = df["col"].astype("int32")
    assert_geodataframe_equal(result, df)
    # check the expected driver
@@ -189,15 +200,15 @@ eastern = pytz.timezone("America/New_York")
 datetime_type_tests = (TEST_DATE, eastern.localize(TEST_DATE))


+@pytest.mark.filterwarnings(
+    "ignore:Non-conformant content for record 1 in column b:RuntimeWarning"
+)  # for GPKG, GDAL writes the tz data but warns on reading (see DATETIME_FORMAT option)
@pytest.mark.parametrize(
    "time", datetime_type_tests, ids=("naive_datetime", "datetime_with_timezone")
 )
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
 def test_to_file_datetime(tmpdir, driver, ext, time, engine):
    """Test writing a data file with the datetime column type"""
-    if engine == "pyogrio" and time.tzinfo is not None:
-        # TODO
-        pytest.skip("pyogrio doesn't yet support timezones")
    if ext in (".shp", ""):
        pytest.skip(f"Driver corresponding to ext {ext} doesn't support dt fields")

@@ -207,23 +218,25 @@ def test_to_file_datetime(tmpdir, driver, ext, time, engine):
    df = GeoDataFrame(
        {"a": [1.0, 2.0], "b": [time, time]}, geometry=[point, point], crs=4326
    )
-    fiona_precision_limit = "ms"
-    df["b"] = df["b"].dt.round(freq=fiona_precision_limit)
+    df["b"] = df["b"].dt.round(freq="ms")

    df.to_file(tempfilename, driver=driver, engine=engine)
    df_read = read_file(tempfilename, engine=engine)

    assert_geodataframe_equal(df.drop(columns=["b"]), df_read.drop(columns=["b"]))
+    # Check datetime column
+    expected = df["b"]
+    if PANDAS_GE_20:
+        expected = df["b"].dt.as_unit("ms")
+    actual = df_read["b"]
    if df["b"].dt.tz is not None:
        # US/Eastern becomes pytz.FixedOffset(-300) when read from file
-        # so compare fairly in terms of UTC
-        assert_series_equal(
-            df["b"].dt.tz_convert(pytz.utc), df_read["b"].dt.tz_convert(pytz.utc)
-        )
-    else:
-        if engine == "pyogrio" and PANDAS_GE_20:
-            df["b"] = df["b"].astype("datetime64[ms]")
-        assert_series_equal(df["b"], df_read["b"])
+        # as GDAL only models offsets, not timezones.
+        # Compare fair result in terms of UTC instead
+        expected = expected.dt.tz_convert(pytz.utc)
+        actual = actual.dt.tz_convert(pytz.utc)
+
+    assert_series_equal(expected, actual)


 dt_exts = ["gpkg", "geojson"]
@@ -239,7 +252,7 @@ def write_invalid_date_file(date_str, tmpdir, ext, engine):
    )
    # Schema not required for GeoJSON since not typed, but needed for GPKG
    if ext == "geojson":
-        df.to_file(tempfilename)
+        df.to_file(tempfilename, engine=engine)
    else:
        schema = {"geometry": "Point", "properties": {"date": "datetime"}}
        if engine == "pyogrio" and not fiona:
@@ -254,7 +267,7 @@ def test_read_file_datetime_invalid(tmpdir, ext, engine):
    # https://github.com/geopandas/geopandas/issues/2502
    date_str = "9999-99-99T00:00:00"  # invalid date handled by GDAL
    tempfilename = write_invalid_date_file(date_str, tmpdir, ext, engine)
-    res = read_file(tempfilename)
+    res = read_file(tempfilename, engine=engine)
    if ext == "gpkg":
        assert is_datetime64_any_dtype(res["date"])
        assert pd.isna(res["date"].iloc[-1])
@@ -265,16 +278,19 @@ def test_read_file_datetime_invalid(tmpdir, ext, engine):

@pytest.mark.parametrize("ext", dt_exts)
 def test_read_file_datetime_out_of_bounds_ns(tmpdir, ext, engine):
+    if engine == "pyogrio" and not PANDAS_GE_20:
+        pytest.skip("with pyogrio requires pandas >= 2.0 to pass")
    # https://github.com/geopandas/geopandas/issues/2502
-    if ext == "geojson":
-        skip_pyogrio_not_supported(engine)
-
    date_str = "9999-12-31T00:00:00"  # valid to GDAL, not to [ns] format
    tempfilename = write_invalid_date_file(date_str, tmpdir, ext, engine)
-    res = read_file(tempfilename)
-    # Pandas invalid datetimes are read in as object dtype (strings)
-    assert res["date"].dtype == "object"
-    assert isinstance(res["date"].iloc[0], str)
+    res = read_file(tempfilename, engine=engine)
+    if PANDAS_GE_30:
+        assert res["date"].dtype == "datetime64[ms]"
+        assert res["date"].iloc[-1] == pd.Timestamp("9999-12-31 00:00:00")
+    else:
+        # Pandas invalid datetimes are read in as object dtype (strings)
+        assert res["date"].dtype == "object"
+        assert isinstance(res["date"].iloc[0], str)


 def test_read_file_datetime_mixed_offsets(tmpdir):
@@ -292,17 +308,13 @@ def test_read_file_datetime_mixed_offsets(tmpdir):
    df.to_file(tempfilename)
    # check mixed tz don't crash GH2478
    res = read_file(tempfilename)
-    if engine == "fiona":
-        # Convert mixed timezones to UTC equivalent
-        assert is_datetime64_any_dtype(res["date"])
-        if not PANDAS_GE_20:
-            utc = pytz.utc
-        else:
-            utc = datetime.timezone.utc
-        assert res["date"].dt.tz == utc
+    # Convert mixed timezones to UTC equivalent
+    assert is_datetime64_any_dtype(res["date"])
+    if not PANDAS_GE_20:
+        utc = pytz.utc
    else:
-        # old fiona and pyogrio ignore timezones and read as datetimes successfully
-        assert is_datetime64_any_dtype(res["date"])
+        utc = datetime.timezone.utc
+    assert res["date"].dt.tz == utc


@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
@@ -365,14 +377,21 @@ def test_to_file_int32(tmpdir, df_points, engine, driver, ext):
    df = GeoDataFrame(geometry=geometry)
    df["data"] = pd.array([1, np.nan] * 5, dtype=pd.Int32Dtype())
    df.to_file(tempfilename, driver=driver, engine=engine)
-    df_read = GeoDataFrame.from_file(tempfilename, driver=driver, engine=engine)
-    assert_geodataframe_equal(df_read, df, check_dtype=False, check_like=True)
+    df_read = GeoDataFrame.from_file(tempfilename, engine=engine)
+    # the int column with missing values comes back as float
+    expected = df.copy()
+    expected["data"] = expected["data"].astype("float64")
+    assert_geodataframe_equal(df_read, expected, check_like=True)
+
+    tempfilename2 = os.path.join(str(tmpdir), f"int32_2.{ext}")
+    df2 = df.dropna()
+    df2.to_file(tempfilename2, driver=driver, engine=engine)
+    df2_read = GeoDataFrame.from_file(tempfilename2, engine=engine)
    if engine == "pyogrio":
-        tempfilename2 = os.path.join(str(tmpdir), f"int32_2.{ext}")
-        df2 = df.dropna()
-        df2.to_file(tempfilename2, driver=driver, engine=engine)
-        df2_read = GeoDataFrame.from_file(tempfilename2, driver=driver, engine=engine)
        assert df2_read["data"].dtype == "int32"
+    else:
+        # with the fiona engine the 32 bitwidth is not preserved
+        assert df2_read["data"].dtype == "int64"


@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
@@ -382,8 +401,11 @@ def test_to_file_int64(tmpdir, df_points, engine, driver, ext):
    df = GeoDataFrame(geometry=geometry)
    df["data"] = pd.array([1, np.nan] * 5, dtype=pd.Int64Dtype())
    df.to_file(tempfilename, driver=driver, engine=engine)
-    df_read = GeoDataFrame.from_file(tempfilename, driver=driver, engine=engine)
-    assert_geodataframe_equal(df_read, df, check_dtype=False, check_like=True)
+    df_read = GeoDataFrame.from_file(tempfilename, engine=engine)
+    # the int column with missing values comes back as float
+    expected = df.copy()
+    expected["data"] = expected["data"].astype("float64")
+    assert_geodataframe_equal(df_read, expected, check_like=True)


 def test_to_file_empty(tmpdir, engine):
@@ -393,12 +415,6 @@ def test_to_file_empty(tmpdir, engine):
        input_empty_df.to_file(tempfilename, engine=engine)


-def test_to_file_privacy(tmpdir, df_nybb):
-    tempfilename = os.path.join(str(tmpdir), "test.shp")
-    with pytest.warns(FutureWarning):
-        geopandas.io.file.to_file(df_nybb, tempfilename)
-
-
 def test_to_file_schema(tmpdir, df_nybb, engine):
    """
    Ensure that the file is written according to the schema
@@ -431,12 +447,13 @@ def test_to_file_schema(tmpdir, df_nybb, engine):
        assert result_schema == schema


-def test_to_file_crs(tmpdir, engine):
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+def test_to_file_crs(tmpdir, engine, nybb_filename):
    """
    Ensure that the file is written according to the crs
    if it is specified
    """
-    df = read_file(geopandas.datasets.get_path("nybb"), engine=engine)
+    df = read_file(nybb_filename, engine=engine)
    tempfilename = os.path.join(str(tmpdir), "crs.shp")

    # save correct CRS
@@ -445,7 +462,7 @@ def test_to_file_crs(tmpdir, engine):
    assert result.crs == df.crs

    if engine == "pyogrio":
-        with pytest.raises(ValueError, match="Passing 'crs' it not supported"):
+        with pytest.raises(ValueError, match="Passing 'crs' is not supported"):
            df.to_file(tempfilename, crs=3857, engine=engine)
        return

@@ -455,8 +472,7 @@ def test_to_file_crs(tmpdir, engine):
    assert result.crs == "epsg:3857"

    # specify CRS for gdf without one
-    df2 = df.copy()
-    df2.crs = None
+    df2 = df.set_crs(None, allow_override=True)
    df2.to_file(tempfilename, crs=2263, engine=engine)
    df = GeoDataFrame.from_file(tempfilename, engine=engine)
    assert df.crs == "epsg:2263"
@@ -529,6 +545,7 @@ def test_mode_unsupported(tmpdir, df_nybb, engine):
        df_nybb.to_file(tempfilename, mode="r", engine=engine)


+@pytest.mark.filterwarnings("ignore:'crs' was not provided:UserWarning:pyogrio")
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
 def test_empty_crs(tmpdir, driver, ext, engine):
    """Test handling of undefined CRS with GPKG driver (GH #1975)."""
@@ -548,7 +565,7 @@ def test_empty_crs(tmpdir, driver, ext, engine):

    if ext == ".geojson":
        # geojson by default assumes epsg:4326
-        df.crs = "EPSG:4326"
+        df.geometry.array.crs = "EPSG:4326"

    assert_geodataframe_equal(result, df)

@@ -561,10 +578,11 @@ def test_empty_crs(tmpdir, driver, ext, engine):
 NYBB_CRS = "epsg:2263"


-def test_read_file(engine):
-    df = read_file(geopandas.datasets.get_path("nybb"), engine=engine)
+def test_read_file(engine, nybb_filename):
+    df = read_file(nybb_filename, engine=engine)
    validate_boro_df(df)
-    assert df.crs == NYBB_CRS
+    if HAS_PYPROJ:
+        assert df.crs == NYBB_CRS
    expected_columns = ["BoroCode", "BoroName", "Shape_Leng", "Shape_Area"]
    assert (df.columns[:-1] == expected_columns).all()

@@ -578,7 +596,7 @@ def test_read_file(engine):
        "main/geopandas/tests/data/null_geom.geojson",
        # url to zip file
        "https://raw.githubusercontent.com/geopandas/geopandas/"
-        "main/geopandas/datasets/nybb_16a.zip",
+        "main/geopandas/tests/data/nybb_16a.zip",
        # url to zipfile without extension
        "https://geonode.goosocean.org/download/480",
        # url to web service
@@ -596,6 +614,25 @@ def test_read_file_local_uri(file_path, engine):
    assert isinstance(gdf, geopandas.GeoDataFrame)


+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+def test_read_file_geojson_string_path(engine):
+    if engine == "pyogrio" and not PYOGRIO_GE_090:
+        pytest.skip("fixed in pyogrio 0.9.0")
+    expected = GeoDataFrame({"val_with_hash": ["row # 0"], "geometry": [Point(0, 1)]})
+    features = {
+        "type": "FeatureCollection",
+        "features": [
+            {
+                "type": "Feature",
+                "properties": {"val_with_hash": "row # 0"},
+                "geometry": {"type": "Point", "coordinates": [0.0, 1.0]},
+            }
+        ],
+    }
+    df_read = read_file(json.dumps(features))
+    assert_geodataframe_equal(expected.set_crs("EPSG:4326"), df_read)
+
+
 def test_read_file_textio(file_path, engine):
    file_text_stream = open(file_path)
    file_stringio = io.StringIO(open(file_path).read())
@@ -648,11 +685,11 @@ def test_read_file_tempfile(engine):
    temp.close()


-def test_read_binary_file_fsspec(engine):
+def test_read_binary_file_fsspec(engine, nybb_filename):
    fsspec = pytest.importorskip("fsspec")
    # Remove the zip scheme so fsspec doesn't open as a zipped file,
    # instead we want to read as bytes and let fiona decode it.
-    path = geopandas.datasets.get_path("nybb")[6:]
+    path = nybb_filename[6:]
    with fsspec.open(path, "rb") as f:
        gdf = read_file(f, engine=engine)
        assert isinstance(gdf, geopandas.GeoDataFrame)
@@ -665,10 +702,10 @@ def test_read_text_file_fsspec(file_path, engine):
        assert isinstance(gdf, geopandas.GeoDataFrame)


-def test_infer_zipped_file(engine):
+def test_infer_zipped_file(engine, nybb_filename):
    # Remove the zip scheme so that the test for a zipped file can
    # check it and add it back.
-    path = geopandas.datasets.get_path("nybb")[6:]
+    path = nybb_filename[6:]
    gdf = read_file(path, engine=engine)
    assert isinstance(gdf, geopandas.GeoDataFrame)

@@ -683,15 +720,24 @@ def test_infer_zipped_file(engine):
    assert isinstance(gdf, geopandas.GeoDataFrame)


-def test_allow_legacy_gdal_path(engine):
+def test_allow_legacy_gdal_path(engine, nybb_filename):
    # Construct a GDAL-style zip path.
-    path = "/vsizip/" + geopandas.datasets.get_path("nybb")[6:]
+    path = "/vsizip/" + nybb_filename[6:]
    gdf = read_file(path, engine=engine)
    assert isinstance(gdf, geopandas.GeoDataFrame)


-def test_read_file_filtered__bbox(df_nybb, engine):
-    nybb_filename = geopandas.datasets.get_path("nybb")
+@pytest.mark.skipif(not PYOGRIO_GE_090, reason="bug fixed in pyogrio 0.9.0")
+def test_read_file_with_hash_in_path(engine, nybb_filename, tmp_path):
+    folder_with_hash = tmp_path / "path with # present"
+    folder_with_hash.mkdir(exist_ok=True, parents=True)
+    read_path = folder_with_hash / "nybb.zip"
+    shutil.copy(nybb_filename[6:], read_path)
+    gdf = read_file(read_path, engine=engine)
+    assert isinstance(gdf, geopandas.GeoDataFrame)
+
+
+def test_read_file_bbox_tuple(df_nybb, engine, nybb_filename):
    bbox = (
        1031051.7879884212,
        224272.49231459625,
@@ -703,8 +749,7 @@ def test_read_file_filtered__bbox(df_nybb, engine):
    assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True))


-def test_read_file_filtered__bbox__polygon(df_nybb, engine):
-    nybb_filename = geopandas.datasets.get_path("nybb")
+def test_read_file_bbox_polygon(df_nybb, engine, nybb_filename):
    bbox = box(
        1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244
    )
@@ -713,14 +758,12 @@ def test_read_file_filtered__bbox__polygon(df_nybb, engine):
    assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True))


-def test_read_file_filtered__rows(df_nybb, engine):
-    nybb_filename = geopandas.datasets.get_path("nybb")
+def test_read_file_filtered__rows(df_nybb, engine, nybb_filename):
    filtered_df = read_file(nybb_filename, rows=1, engine=engine)
    assert_geodataframe_equal(filtered_df, df_nybb.iloc[[0], :])


-def test_read_file_filtered__rows_slice(df_nybb, engine):
-    nybb_filename = geopandas.datasets.get_path("nybb")
+def test_read_file_filtered__rows_slice(df_nybb, engine, nybb_filename):
    filtered_df = read_file(nybb_filename, rows=slice(1, 3), engine=engine)
    assert_geodataframe_equal(filtered_df, df_nybb.iloc[1:3, :].reset_index(drop=True))

@@ -728,21 +771,14 @@ def test_read_file_filtered__rows_slice(df_nybb, engine):
@pytest.mark.filterwarnings(
    "ignore:Layer does not support OLC_FASTFEATURECOUNT:RuntimeWarning"
 )  # for the slice with -1
-def test_read_file_filtered__rows_bbox(df_nybb, engine):
-    nybb_filename = geopandas.datasets.get_path("nybb")
+def test_read_file_filtered__rows_bbox(df_nybb, engine, nybb_filename):
    bbox = (
        1031051.7879884212,
        224272.49231459625,
        1047224.3104931959,
        244317.30894023244,
    )
-    if engine == "pyogrio" and not PYOGRIO_GE_07:
-        with pytest.raises(ValueError, match="'skip_features' must be between 0 and 1"):
-            # combination bbox and rows (rows slice applied after bbox filtering!)
-            filtered_df = read_file(
-                nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine
-            )
-    else:  # fiona
+    if engine == "fiona":
        # combination bbox and rows (rows slice applied after bbox filtering!)
        filtered_df = read_file(
            nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine
@@ -768,16 +804,14 @@ def test_read_file_filtered__rows_bbox(df_nybb, engine):
        )


-def test_read_file_filtered_rows_invalid(engine):
+def test_read_file_filtered_rows_invalid(engine, nybb_filename):
    with pytest.raises(TypeError):
-        read_file(
-            geopandas.datasets.get_path("nybb"), rows="not_a_slice", engine=engine
-        )
+        read_file(nybb_filename, rows="not_a_slice", engine=engine)


-def test_read_file__ignore_geometry(engine):
+def test_read_file__ignore_geometry(engine, naturalearth_lowres):
    pdf = geopandas.read_file(
-        geopandas.datasets.get_path("naturalearth_lowres"),
+        naturalearth_lowres,
        ignore_geometry=True,
        engine=engine,
    )
@@ -785,20 +819,73 @@ def test_read_file__ignore_geometry(engine):
    assert isinstance(pdf, pd.DataFrame) and not isinstance(pdf, geopandas.GeoDataFrame)


-def test_read_file__ignore_all_fields(engine):
-    skip_pyogrio_not_supported(engine)  # pyogrio has "columns" keyword instead
+@pytest.mark.filterwarnings(
+    "ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
+)
+def test_read_file__ignore_fields(engine, naturalearth_lowres):
    gdf = geopandas.read_file(
-        geopandas.datasets.get_path("naturalearth_lowres"),
+        naturalearth_lowres,
+        ignore_fields=["pop_est", "continent", "iso_a3", "gdp_md_est"],
+        engine=engine,
+    )
+    assert gdf.columns.tolist() == ["name", "geometry"]
+
+
+@pytest.mark.filterwarnings(
+    "ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
+)
+def test_read_file__ignore_all_fields(engine, naturalearth_lowres):
+    gdf = geopandas.read_file(
+        naturalearth_lowres,
        ignore_fields=["pop_est", "continent", "name", "iso_a3", "gdp_md_est"],
-        engine="fiona",
+        engine=engine,
    )
    assert gdf.columns.tolist() == ["geometry"]


-def test_read_file__where_filter(engine):
+def test_read_file_missing_geometry(tmpdir, engine):
+    filename = str(tmpdir / "test.csv")
+
+    expected = pd.DataFrame(
+        {"col1": np.array([1, 2, 3], dtype="int64"), "col2": ["a", "b", "c"]}
+    )
+    expected.to_csv(filename, index=False)
+
+    df = geopandas.read_file(filename, engine=engine)
+    # both engines read integers as strings; force back to original type
+    df["col1"] = df["col1"].astype("int64")
+
+    assert isinstance(df, pd.DataFrame)
+    assert not isinstance(df, geopandas.GeoDataFrame)
+
+    assert_frame_equal(df, expected)
+
+
+def test_read_file_None_attribute(tmp_path, engine):
+    # Test added in context of https://github.com/geopandas/geopandas/issues/2901
+    test_path = tmp_path / "test.gpkg"
+    gdf = GeoDataFrame(
+        {"a": [None, None]}, geometry=[Point(1, 2), Point(3, 4)], crs=4326
+    )
+
+    gdf.to_file(test_path, engine=engine)
+    read_gdf = read_file(test_path, engine=engine)
+    assert_geodataframe_equal(gdf, read_gdf)
+
+
+def test_read_csv_dtype(tmpdir, df_nybb):
+    filename = str(tmpdir / "test.csv")
+
+    df_nybb.to_csv(filename, index=False)
+    pdf = pd.read_csv(filename, dtype={"geometry": "geometry"})
+
+    assert pdf.geometry.dtype == "geometry"
+
+
+def test_read_file__where_filter(engine, naturalearth_lowres):
    if FIONA_GE_19 or engine == "pyogrio":
        gdf = geopandas.read_file(
-            geopandas.datasets.get_path("naturalearth_lowres"),
+            naturalearth_lowres,
            where="continent='Africa'",
            engine=engine,
        )
@@ -806,26 +893,75 @@ def test_read_file__where_filter(engine):
    else:
        with pytest.raises(NotImplementedError):
            geopandas.read_file(
-                geopandas.datasets.get_path("naturalearth_lowres"),
+                naturalearth_lowres,
                where="continent='Africa'",
                engine="fiona",
            )


-@PYOGRIO_MARK
-def test_read_file__columns():
-    # TODO: this is only support for pyogrio, but we could mimic it for fiona as well
+def test_read_file__columns(engine, naturalearth_lowres):
+    if engine == "fiona" and not FIONA_GE_19:
+        pytest.skip("columns requires fiona 1.9+")
+
    gdf = geopandas.read_file(
-        geopandas.datasets.get_path("naturalearth_lowres"),
-        columns=["name", "pop_est"],
-        engine="pyogrio",
+        naturalearth_lowres, columns=["name", "pop_est"], engine=engine
    )
    assert gdf.columns.tolist() == ["name", "pop_est", "geometry"]


-def test_read_file_filtered_with_gdf_boundary(df_nybb, engine):
+def test_read_file__columns_empty(engine, naturalearth_lowres):
+    if engine == "fiona" and not FIONA_GE_19:
+        pytest.skip("columns requires fiona 1.9+")
+
+    gdf = geopandas.read_file(naturalearth_lowres, columns=[], engine=engine)
+    assert gdf.columns.tolist() == ["geometry"]
+
+
+@pytest.mark.skipif(FIONA_GE_19 or not fiona, reason="test for fiona < 1.9")
+def test_read_file__columns_old_fiona(naturalearth_lowres):
+    with pytest.raises(NotImplementedError):
+        geopandas.read_file(
+            naturalearth_lowres, columns=["name", "pop_est"], engine="fiona"
+        )
+
+
+@pytest.mark.filterwarnings(
+    "ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
+)
+def test_read_file__include_fields(engine, naturalearth_lowres):
+    if engine == "fiona" and not FIONA_GE_19:
+        pytest.skip("columns requires fiona 1.9+")
+
+    gdf = geopandas.read_file(
+        naturalearth_lowres, include_fields=["name", "pop_est"], engine=engine
+    )
+    assert gdf.columns.tolist() == ["name", "pop_est", "geometry"]
+
+
+@pytest.mark.skipif(not FIONA_GE_19, reason="columns requires fiona 1.9+")
+def test_read_file__columns_conflicting_keywords(engine, naturalearth_lowres):
+    path = naturalearth_lowres
+
+    with pytest.raises(ValueError, match="Cannot specify both"):
+        geopandas.read_file(
+            path, include_fields=["name"], ignore_fields=["pop_est"], engine=engine
+        )
+
+    with pytest.raises(ValueError, match="Cannot specify both"):
+        geopandas.read_file(
+            path, columns=["name"], include_fields=["pop_est"], engine=engine
+        )
+
+    with pytest.raises(ValueError, match="Cannot specify both"):
+        geopandas.read_file(
+            path, columns=["name"], ignore_fields=["pop_est"], engine=engine
+        )
+
+
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+@pytest.mark.parametrize("file_like", [False, True])
+def test_read_file_bbox_gdf(df_nybb, engine, nybb_filename, file_like):
    full_df_shape = df_nybb.shape
-    nybb_filename = geopandas.datasets.get_path("nybb")
    bbox = geopandas.GeoDataFrame(
        geometry=[
            box(
@@ -837,28 +973,41 @@ def test_read_file_filtered_with_gdf_boundary(df_nybb, engine):
        ],
        crs=NYBB_CRS,
    )
-    filtered_df = read_file(nybb_filename, bbox=bbox, engine=engine)
+    infile = (
+        open(nybb_filename.replace("zip://", ""), "rb") if file_like else nybb_filename
+    )
+    filtered_df = read_file(infile, bbox=bbox, engine=engine)
    filtered_df_shape = filtered_df.shape
    assert full_df_shape != filtered_df_shape
    assert filtered_df_shape == (2, 5)


-def test_read_file_filtered_with_gdf_boundary__mask(df_nybb, engine):
-    skip_pyogrio_not_supported(engine)
-    gdf_mask = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))
-    gdf = geopandas.read_file(
-        geopandas.datasets.get_path("naturalearth_cities"),
-        mask=gdf_mask[gdf_mask.continent == "Africa"],
-        engine=engine,
-    )
-    filtered_df_shape = gdf.shape
-    assert filtered_df_shape == (57, 2)
-
-
-def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb, engine):
-    skip_pyogrio_not_supported(engine)
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+@pytest.mark.parametrize("file_like", [False, True])
+def test_read_file_mask_gdf(df_nybb, engine, nybb_filename, file_like):
+    full_df_shape = df_nybb.shape
+    mask = geopandas.GeoDataFrame(
+        geometry=[
+            box(
+                1031051.7879884212,
+                224272.49231459625,
+                1047224.3104931959,
+                244317.30894023244,
+            )
+        ],
+        crs=NYBB_CRS,
+    )
+    infile = (
+        open(nybb_filename.replace("zip://", ""), "rb") if file_like else nybb_filename
+    )
+    filtered_df = read_file(infile, mask=mask, engine=engine)
+    filtered_df_shape = filtered_df.shape
+    assert full_df_shape != filtered_df_shape
+    assert filtered_df_shape == (2, 5)
+
+
+def test_read_file_mask_polygon(df_nybb, engine, nybb_filename):
    full_df_shape = df_nybb.shape
-    nybb_filename = geopandas.datasets.get_path("nybb")
    mask = box(
        1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244
    )
@@ -868,10 +1017,25 @@ def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb, engine):
    assert filtered_df_shape == (2, 5)


-def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb, engine):
-    skip_pyogrio_not_supported(engine)
+def test_read_file_mask_geojson(df_nybb, nybb_filename, engine):
+    full_df_shape = df_nybb.shape
+    mask = mapping(
+        box(
+            1031051.7879884212,
+            224272.49231459625,
+            1047224.3104931959,
+            244317.30894023244,
+        )
+    )
+    filtered_df = read_file(nybb_filename, mask=mask, engine=engine)
+    filtered_df_shape = filtered_df.shape
+    assert full_df_shape != filtered_df_shape
+    assert filtered_df_shape == (2, 5)
+
+
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+def test_read_file_bbox_gdf_mismatched_crs(df_nybb, engine, nybb_filename):
    full_df_shape = df_nybb.shape
-    nybb_filename = geopandas.datasets.get_path("nybb")
    bbox = geopandas.GeoDataFrame(
        geometry=[
            box(
@@ -890,10 +1054,9 @@ def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb, engine):
    assert filtered_df_shape == (2, 5)


-def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb, engine):
-    skip_pyogrio_not_supported(engine)
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+def test_read_file_mask_gdf_mismatched_crs(df_nybb, engine, nybb_filename):
    full_df_shape = df_nybb.shape
-    nybb_filename = geopandas.datasets.get_path("nybb")
    mask = geopandas.GeoDataFrame(
        geometry=[
            box(
@@ -912,6 +1075,20 @@ def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb, engi
    assert filtered_df_shape == (2, 5)


+def test_read_file_bbox_mask_not_allowed(engine, nybb_filename):
+    bbox = (
+        1031051.7879884212,
+        224272.49231459625,
+        1047224.3104931959,
+        244317.30894023244,
+    )
+
+    mask = box(*bbox)
+
+    with pytest.raises(ValueError, match="mask and bbox can not be set together"):
+        read_file(nybb_filename, bbox=bbox, mask=mask)
+
+
@pytest.mark.filterwarnings(
    "ignore:Layer 'b'test_empty'' does not have any features:UserWarning"
 )
@@ -942,11 +1119,6 @@ def test_read_file_empty_shapefile(tmpdir, engine):
    assert all(empty.columns == ["A", "Z", "geometry"])


-def test_read_file_privacy(tmpdir, df_nybb):
-    with pytest.warns(FutureWarning):
-        geopandas.io.file.read_file(geopandas.datasets.get_path("nybb"))
-
-
 class FileNumber(object):
    def __init__(self, tmpdir, base, ext):
        self.tmpdir = str(tmpdir)
@@ -1113,7 +1285,7 @@ def test_write_index_to_file(tmpdir, df_points, driver, ext, engine):
    # index as string
    df_p = df_points.copy()
    df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
-    df.index = pd.TimedeltaIndex(range(len(df)), "days")
+    df.index = pd.to_timedelta(range(len(df)), unit="days")
    # TODO: TimedeltaIndex is an invalid field type
    df.index = df.index.astype(str)
    do_checks(df, index_is_used=True)
@@ -1121,7 +1293,7 @@ def test_write_index_to_file(tmpdir, df_points, driver, ext, engine):
    # unnamed DatetimeIndex
    df_p = df_points.copy()
    df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
-    df.index = pd.TimedeltaIndex(range(len(df)), "days") + pd.DatetimeIndex(
+    df.index = pd.to_timedelta(range(len(df)), unit="days") + pd.to_datetime(
        ["1999-12-27"] * len(df)
    )
    if driver == "ESRI Shapefile":
@@ -1152,6 +1324,54 @@ def test_write_read_file(test_file, engine):
    os.remove(os.path.expanduser(test_file))


+@pytest.mark.skipif(fiona is False, reason="Fiona not available")
+@pytest.mark.skipif(FIONA_GE_19, reason="Fiona >= 1.9 supports metadata")
+def test_to_file_metadata_unsupported_fiona_version(tmp_path, df_points):
+    metadata = {"title": "test"}
+    tmp_file = tmp_path / "test.gpkg"
+    match = "'metadata' keyword is only supported for Fiona >= 1.9"
+    with pytest.raises(NotImplementedError, match=match):
+        df_points.to_file(tmp_file, driver="GPKG", engine="fiona", metadata=metadata)
+
+
+@pytest.mark.skipif(not FIONA_GE_19, reason="only Fiona >= 1.9 supports metadata")
+def test_to_file_metadata_supported_fiona_version(tmp_path, df_points):
+    metadata = {"title": "test"}
+    tmp_file = tmp_path / "test.gpkg"
+
+    df_points.to_file(tmp_file, driver="GPKG", engine="fiona", metadata=metadata)
+
+    # Check that metadata is written to the file
+    with fiona.open(tmp_file) as src:
+        tags = src.tags()
+        assert tags == metadata
+
+
+@pytest.mark.skipif(pyogrio is False, reason="Pyogrio not available")
+def test_to_file_metadata_pyogrio(tmp_path, df_points):
+    metadata = {"title": "test"}
+    tmp_file = tmp_path / "test.gpkg"
+
+    df_points.to_file(tmp_file, driver="GPKG", engine="pyogrio", metadata=metadata)
+
+    # Check that metadata is written to the file
+    info = pyogrio.read_info(tmp_file)
+    layer_metadata = info["layer_metadata"]
+    assert layer_metadata == metadata
+
+
+@pytest.mark.parametrize(
+    "driver, ext", [("ESRI Shapefile", ".shp"), ("GeoJSON", ".geojson")]
+)
+def test_to_file_metadata_unsupported_driver(driver, ext, tmpdir, df_points, engine):
+    metadata = {"title": "Test"}
+    tempfilename = os.path.join(str(tmpdir), "test" + ext)
+    with pytest.raises(
+        NotImplementedError, match="'metadata' keyword is only supported for"
+    ):
+        df_points.to_file(tempfilename, driver=driver, metadata=metadata)
+
+
 def test_multiple_geom_cols_error(tmpdir, df_nybb):
    df_nybb["geom2"] = df_nybb.geometry
    with pytest.raises(ValueError, match="GeoDataFrame contains multiple geometry"):
@@ -1160,7 +1380,7 @@ def test_multiple_geom_cols_error(tmpdir, df_nybb):

@PYOGRIO_MARK
@FIONA_MARK
-def test_option_io_engine():
+def test_option_io_engine(nybb_filename):
    try:
        geopandas.options.io_engine = "pyogrio"

@@ -1171,8 +1391,48 @@ def test_option_io_engine():
        orig = fiona.supported_drivers["ESRI Shapefile"]
        fiona.supported_drivers["ESRI Shapefile"] = "w"

-        nybb_filename = geopandas.datasets.get_path("nybb")
        _ = geopandas.read_file(nybb_filename)
    finally:
        fiona.supported_drivers["ESRI Shapefile"] = orig
        geopandas.options.io_engine = None
+
+
+@pytest.mark.skipif(pyogrio, reason="test for pyogrio not installed")
+def test_error_engine_unavailable_pyogrio(tmp_path, df_points, file_path):
+
+    with pytest.raises(ImportError, match="the 'read_file' function requires"):
+        geopandas.read_file(file_path, engine="pyogrio")
+
+    with pytest.raises(ImportError, match="the 'to_file' method requires"):
+        df_points.to_file(tmp_path / "test.gpkg", engine="pyogrio")
+
+
+@pytest.mark.skipif(fiona, reason="test for fiona not installed")
+def test_error_engine_unavailable_fiona(tmp_path, df_points, file_path):
+
+    with pytest.raises(ImportError, match="the 'read_file' function requires"):
+        geopandas.read_file(file_path, engine="fiona")
+
+    with pytest.raises(ImportError, match="the 'to_file' method requires"):
+        df_points.to_file(tmp_path / "test.gpkg", engine="fiona")
+
+
+@PYOGRIO_MARK
+def test_list_layers(df_points, tmpdir):
+    tempfilename = os.path.join(str(tmpdir), "dataset.gpkg")
+    df_points.to_file(tempfilename, layer="original")
+    df_points.set_geometry(df_points.buffer(1)).to_file(tempfilename, layer="buffered")
+    df_points.set_geometry(df_points.buffer(2).boundary).to_file(
+        tempfilename, layer="boundary"
+    )
+    pyogrio.write_dataframe(
+        df_points[["value1", "value2"]], tempfilename, layer="non-spatial"
+    )
+    layers = geopandas.list_layers(tempfilename)
+    expected = pd.DataFrame(
+        {
+            "name": ["original", "buffered", "boundary", "non-spatial"],
+            "geometry_type": ["Point", "Polygon", "LineString", None],
+        }
+    )
+    assert_frame_equal(layers, expected)
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file_geom_types_drivers.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file_geom_types_drivers.py
@@ -12,11 +12,10 @@ from shapely.geometry import (
 import geopandas
 from geopandas import GeoDataFrame

-from geopandas.testing import assert_geodataframe_equal
-import pytest
-
 from .test_file import FIONA_MARK, PYOGRIO_MARK

+import pytest
+from geopandas.testing import assert_geodataframe_equal

 # Credit: Polygons below come from Montreal city Open Data portal
 # http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
@@ -244,7 +243,14 @@ def geodataframe(request):
    return request.param


-@pytest.fixture(params=["GeoJSON", "ESRI Shapefile", "GPKG", "SQLite"])
+@pytest.fixture(
+    params=[
+        ("GeoJSON", ".geojson"),
+        ("ESRI Shapefile", ".shp"),
+        ("GPKG", ".gpkg"),
+        ("SQLite", ".sqlite"),
+    ]
+)
 def ogr_driver(request):
    return request.param

@@ -260,16 +266,18 @@ def engine(request):


 def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
-    output_file = os.path.join(str(tmpdir), "output_file")
+    driver, ext = ogr_driver
+    output_file = os.path.join(str(tmpdir), "output_file" + ext)
    write_kwargs = {}
-    if ogr_driver == "SQLite":
+    if driver == "SQLite":
        write_kwargs["spatialite"] = True

        # This if statement can be removed once minimal fiona version >= 1.8.20
        if engine == "fiona":
-            import fiona
            from packaging.version import Version

+            import fiona
+
            if Version(fiona.__version__) < Version("1.8.20"):
                pytest.skip("SQLite driver only available from version 1.8.20")

@@ -285,22 +293,35 @@ def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
        ):
            write_kwargs["geometry_type"] = "Point Z"

-    expected_error = _expected_error_on(geodataframe, ogr_driver)
+    expected_error = _expected_error_on(geodataframe, driver)
    if expected_error:
        with pytest.raises(
            RuntimeError, match="Failed to write record|Could not add feature to layer"
        ):
            geodataframe.to_file(
-                output_file, driver=ogr_driver, engine=engine, **write_kwargs
+                output_file, driver=driver, engine=engine, **write_kwargs
            )
    else:
-        geodataframe.to_file(
-            output_file, driver=ogr_driver, engine=engine, **write_kwargs
-        )
+        if driver == "SQLite" and engine == "pyogrio":
+            try:
+                geodataframe.to_file(
+                    output_file, driver=driver, engine=engine, **write_kwargs
+                )
+            except ValueError as e:
+                if "unrecognized option 'SPATIALITE'" in str(e):
+                    pytest.xfail(
+                        "pyogrio wheels from PyPI do not come with SpatiaLite support. "
+                        f"Error: {e}"
+                    )
+                raise
+        else:
+            geodataframe.to_file(
+                output_file, driver=driver, engine=engine, **write_kwargs
+            )

        reloaded = geopandas.read_file(output_file, engine=engine)

-        if ogr_driver == "GeoJSON" and engine == "pyogrio":
+        if driver == "GeoJSON" and engine == "pyogrio":
            # For GeoJSON files, the int64 column comes back as int32
            reloaded["a"] = reloaded["a"].astype("int64")

--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_infer_schema.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_infer_schema.py
@@ -1,5 +1,8 @@
 from collections import OrderedDict

+import numpy as np
+import pandas as pd
+
 from shapely.geometry import (
    LineString,
    MultiLineString,
@@ -9,12 +12,11 @@ from shapely.geometry import (
    Polygon,
 )

-import pandas as pd
-import pytest
-import numpy as np
 from geopandas import GeoDataFrame
 from geopandas.io.file import infer_schema

+import pytest
+
 # Credit: Polygons below come from Montreal city Open Data portal
 # http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
 city_hall_boundaries = Polygon(
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_pickle.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_pickle.py
@@ -2,7 +2,7 @@
 See generate_legacy_storage_files.py for the creation of the legacy files.

 """
-from contextlib import contextmanager
+
 import glob
 import os
 import pathlib
@@ -11,9 +11,6 @@ import pandas as pd

 import pytest
 from geopandas.testing import assert_geodataframe_equal
-from geopandas import _compat as compat
-import geopandas
-from shapely.geometry import Point

 DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"

@@ -34,18 +31,7 @@ def legacy_pickle(request):
    return request.param


-@contextmanager
-def with_use_pygeos(option):
-    orig = geopandas.options.use_pygeos
-    geopandas.options.use_pygeos = option
-    try:
-        yield
-    finally:
-        geopandas.options.use_pygeos = orig
-
-
-@pytest.mark.skipif(
-    compat.USE_SHAPELY_20 or compat.USE_PYGEOS,
+@pytest.mark.skip(
    reason=(
        "shapely 2.0/pygeos-based unpickling currently only works for "
        "shapely-2.0/pygeos-written files"
@@ -68,43 +54,3 @@ def test_round_trip_current(tmpdir, current_pickle_data):
        result = pd.read_pickle(path)
        assert_geodataframe_equal(result, value)
        assert isinstance(result.has_sindex, bool)
-
-
-def _create_gdf():
-    return geopandas.GeoDataFrame(
-        {"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
-        crs="EPSG:4326",
-    )
-
-
-@pytest.mark.skipif(not compat.HAS_PYGEOS, reason="requires pygeos to test #1745")
-def test_pygeos_switch(tmpdir):
-    # writing and reading with pygeos disabled
-    with with_use_pygeos(False):
-        gdf = _create_gdf()
-        path = str(tmpdir / "gdf_crs1.pickle")
-        gdf.to_pickle(path)
-        result = pd.read_pickle(path)
-        assert_geodataframe_equal(result, gdf)
-
-    # writing without pygeos, reading with pygeos
-    with with_use_pygeos(False):
-        gdf = _create_gdf()
-        path = str(tmpdir / "gdf_crs1.pickle")
-        gdf.to_pickle(path)
-
-    with with_use_pygeos(True):
-        result = pd.read_pickle(path)
-        gdf = _create_gdf()
-        assert_geodataframe_equal(result, gdf)
-
-    # writing with pygeos, reading without pygeos
-    with with_use_pygeos(True):
-        gdf = _create_gdf()
-        path = str(tmpdir / "gdf_crs1.pickle")
-        gdf.to_pickle(path)
-
-    with with_use_pygeos(False):
-        result = pd.read_pickle(path)
-        gdf = _create_gdf()
-        assert_geodataframe_equal(result, gdf)
--- a/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_sql.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/io/tests/test_sql.py
@@ -4,18 +4,27 @@ The spatial database tests may not work without additional system
 configuration. postGIS tests require a test database to have been setup;
 see geopandas.tests.util for more information.
 """
+
 import os
 import warnings
+from importlib.util import find_spec

 import pandas as pd

 import geopandas
-from geopandas import GeoDataFrame, read_file, read_postgis
-
 import geopandas._compat as compat
-from geopandas.io.sql import _get_conn as get_conn, _write_postgis as write_postgis
-from geopandas.tests.util import create_postgis, create_spatialite, validate_boro_df
+from geopandas import GeoDataFrame, read_file, read_postgis
+from geopandas._compat import HAS_PYPROJ
+from geopandas.io.sql import _get_conn as get_conn
+from geopandas.io.sql import _write_postgis as write_postgis
+
 import pytest
+from geopandas.tests.util import (
+    create_postgis,
+    create_spatialite,
+    mock,
+    validate_boro_df,
+)

 try:
    from sqlalchemy import text
@@ -26,31 +35,48 @@ except ImportError:


@pytest.fixture
-def df_nybb():
-    nybb_path = geopandas.datasets.get_path("nybb")
-    df = read_file(nybb_path)
+def df_nybb(nybb_filename):
+    df = read_file(nybb_filename)
    return df


-@pytest.fixture()
-def connection_postgis():
+def check_available_postgis_drivers() -> list[str]:
+    """Work out which of psycopg2 and psycopg are available.
+    This prevents tests running if the relevant package isn't installed
+    (rather than being skipped, as skips are treated as failures during postgis CI)
    """
-    Initiates a connection to a postGIS database that must already exist.
-    See create_postgis for more information.
-    """
-    psycopg2 = pytest.importorskip("psycopg2")
-    from psycopg2 import OperationalError
+    drivers = []
+    if find_spec("psycopg"):
+        drivers.append("psycopg")
+    if find_spec("psycopg2"):
+        drivers.append("psycopg2")
+    return drivers
+
+
+POSTGIS_DRIVERS = check_available_postgis_drivers()
+
+
+def prepare_database_credentials() -> dict:
+    """Gather postgres connection credentials from environment variables."""
+    return {
+        "dbname": "test_geopandas",
+        "user": os.environ.get("PGUSER"),
+        "password": os.environ.get("PGPASSWORD"),
+        "host": os.environ.get("PGHOST"),
+        "port": os.environ.get("PGPORT"),
+    }
+
+
+@pytest.fixture()
+def connection_postgis(request):
+    """Create a postgres connection using either psycopg2 or psycopg.
+
+    Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
+    psycopg = pytest.importorskip(request.param)

-    dbname = "test_geopandas"
-    user = os.environ.get("PGUSER")
-    password = os.environ.get("PGPASSWORD")
-    host = os.environ.get("PGHOST")
-    port = os.environ.get("PGPORT")
    try:
-        con = psycopg2.connect(
-            dbname=dbname, user=user, password=password, host=host, port=port
-        )
-    except OperationalError:
+        con = psycopg.connect(**prepare_database_credentials())
+    except psycopg.OperationalError:
        pytest.skip("Cannot connect with postgresql database")
    with warnings.catch_warnings():
        warnings.filterwarnings(
@@ -61,28 +87,25 @@ def connection_postgis():


@pytest.fixture()
-def engine_postgis():
+def engine_postgis(request):
    """
-    Initiates a connection engine to a postGIS database that must already exist.
+    Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
+
+    Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
    """
    sqlalchemy = pytest.importorskip("sqlalchemy")
    from sqlalchemy.engine.url import URL

-    user = os.environ.get("PGUSER")
-    password = os.environ.get("PGPASSWORD")
-    host = os.environ.get("PGHOST")
-    port = os.environ.get("PGPORT")
-    dbname = "test_geopandas"
-
+    credentials = prepare_database_credentials()
    try:
        con = sqlalchemy.create_engine(
            URL.create(
-                drivername="postgresql+psycopg2",
-                username=user,
-                database=dbname,
-                password=password,
-                host=host,
-                port=port,
+                drivername=f"postgresql+{request.param}",
+                username=credentials["user"],
+                database=credentials["dbname"],
+                password=credentials["password"],
+                host=credentials["host"],
+                port=credentials["port"],
            )
        )
        con.connect()
@@ -140,7 +163,7 @@ def drop_table_if_exists(conn_or_engine, table):

@pytest.fixture
 def df_mixed_single_and_multi():
-    from shapely.geometry import Point, LineString, MultiLineString
+    from shapely.geometry import LineString, MultiLineString, Point

    df = geopandas.GeoDataFrame(
        {
@@ -157,7 +180,7 @@ def df_mixed_single_and_multi():

@pytest.fixture
 def df_geom_collection():
-    from shapely.geometry import Point, LineString, Polygon, GeometryCollection
+    from shapely.geometry import GeometryCollection, LineString, Point, Polygon

    df = geopandas.GeoDataFrame(
        {
@@ -188,7 +211,7 @@ def df_linear_ring():

@pytest.fixture
 def df_3D_geoms():
-    from shapely.geometry import Point, LineString, Polygon
+    from shapely.geometry import LineString, Point, Polygon

    df = geopandas.GeoDataFrame(
        {
@@ -204,6 +227,7 @@ def df_3D_geoms():


 class TestIO:
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_get_conn(self, engine_postgis):
        Connection = pytest.importorskip("sqlalchemy.engine.base").Connection

@@ -217,6 +241,7 @@ class TestIO:
            with get_conn(object()):
                pass

+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_read_postgis_default(self, connection_postgis, df_nybb):
        con = connection_postgis
        create_postgis(con, df_nybb)
@@ -229,6 +254,7 @@ class TestIO:
        # by user; should not be set to 0, as from get_srid failure
        assert df.crs is None

+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
        con = connection_postgis
        geom_col = "the_geom"
@@ -239,6 +265,7 @@ class TestIO:

        validate_boro_df(df)

+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
        """Tests that a SELECT {geom} AS {some_other_geom} works."""
        con = connection_postgis
@@ -254,6 +281,7 @@ class TestIO:

        validate_boro_df(df)

+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
        """Tests that an SRID can be read from a geodatabase (GH #451)."""
        con = connection_postgis
@@ -267,6 +295,7 @@ class TestIO:
        validate_boro_df(df)
        assert df.crs == crs

+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
        """Tests that a user specified CRS overrides the geodatabase SRID."""
        con = connection_postgis
@@ -279,6 +308,7 @@ class TestIO:
        validate_boro_df(df)
        assert df.crs == orig_crs

+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_from_postgis_default(self, connection_postgis, df_nybb):
        con = connection_postgis
        create_postgis(con, df_nybb)
@@ -288,6 +318,7 @@ class TestIO:

        validate_boro_df(df, case_sensitive=False)

+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
        con = connection_postgis
        geom_col = "the_geom"
@@ -323,6 +354,7 @@ class TestIO:
        df = read_postgis(sql, con, geom_col=geom_col)
        validate_boro_df(df)

+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
        """Test chunksize argument"""
        chunksize = 2
@@ -337,14 +369,7 @@ class TestIO:
        # by user; should not be set to 0, as from get_srid failure
        assert df.crs is None

-    def test_read_postgis_privacy(self, connection_postgis, df_nybb):
-        con = connection_postgis
-        create_postgis(con, df_nybb)
-
-        sql = "SELECT * FROM nybb;"
-        with pytest.warns(FutureWarning):
-            geopandas.io.sql.read_postgis(sql, con)
-
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_default(self, engine_postgis, df_nybb):
        """Tests that GeoDataFrame can be written to PostGIS with defaults."""
        engine = engine_postgis
@@ -360,6 +385,7 @@ class TestIO:
        df = read_postgis(sql, engine, geom_col="geometry")
        validate_boro_df(df)

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
        """Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
        engine = engine_postgis
@@ -375,6 +401,7 @@ class TestIO:
        df = read_postgis(sql, engine, geom_col="geometry")
        validate_boro_df(df)

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
        """Tests that GeoDataFrame can be written to PostGIS with defaults."""
        with engine_postgis.begin() as con:
@@ -390,6 +417,7 @@ class TestIO:
            df = read_postgis(sql, con, geom_col="geometry")
            validate_boro_df(df)

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
        """
        Tests that uploading the same table raises error when: if_replace='fail'.
@@ -409,6 +437,7 @@ class TestIO:
            else:
                raise e

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
        """
        Tests that replacing a table is possible when: if_replace='replace'.
@@ -426,6 +455,7 @@ class TestIO:
        df = read_postgis(sql, engine, geom_col="geometry")
        validate_boro_df(df)

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
        """
        Tests that appending to existing table produces correct results when:
@@ -445,15 +475,18 @@ class TestIO:

        # There should be twice as many rows in the new table
        assert new_rows == orig_rows * 2, (
-            "There should be {target} rows,"
-            "found: {current}".format(target=orig_rows * 2, current=new_rows),
+            "There should be {target} rows,found: {current}".format(
+                target=orig_rows * 2, current=new_rows
+            ),
        )
        # Number of columns should stay the same
        assert new_cols == orig_cols, (
-            "There should be {target} columns,"
-            "found: {current}".format(target=orig_cols, current=new_cols),
+            "There should be {target} columns,found: {current}".format(
+                target=orig_cols, current=new_cols
+            ),
        )

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
        """
        Tests that GeoDataFrame can be written to PostGIS without CRS information.
@@ -463,8 +496,7 @@ class TestIO:
        table = "nybb"

        # Write to db
-        df_nybb = df_nybb
-        df_nybb.crs = None
+        df_nybb.geometry.array.crs = None
        with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
            write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
        # Validate that srid is -1
@@ -477,6 +509,7 @@ class TestIO:
            target_srid = conn.execute(sql).fetchone()[0]
        assert target_srid == 0, "SRID should be 0, found %s" % target_srid

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
        """
        Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
@@ -499,6 +532,7 @@ class TestIO:
            target_srid = conn.execute(sql).fetchone()[0]
        assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_geometry_collection(
        self, engine_postgis, df_geom_collection
    ):
@@ -525,6 +559,7 @@ class TestIO:
        assert geom_type.upper() == "GEOMETRYCOLLECTION"
        assert df.geom_type.unique()[0] == "GeometryCollection"

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_mixed_geometry_types(
        self, engine_postgis, df_mixed_single_and_multi
    ):
@@ -551,6 +586,7 @@ class TestIO:
        assert res[1][0].upper() == "MULTILINESTRING"
        assert res[2][0].upper() == "POINT"

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
        """
        Tests that writing a LinearRing.
@@ -572,6 +608,7 @@ class TestIO:

        assert geom_type.upper() == "LINESTRING"

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
        """
        Tests writing a LinearRing works.
@@ -605,6 +642,7 @@ class TestIO:
        assert res[1][0].upper() == "MULTILINESTRING"
        assert res[2][0].upper() == "POINT"

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
        """
        Tests writing data to alternative schema.
@@ -628,6 +666,7 @@ class TestIO:
        df = read_postgis(sql, engine, geom_col="geometry")
        validate_boro_df(df)

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_to_different_schema_when_table_exists(
        self, engine_postgis, df_nybb
    ):
@@ -672,6 +711,7 @@ class TestIO:
        df = read_postgis(sql, engine, geom_col="geometry")
        validate_boro_df(df)

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
        """
        Tests writing a geometries with 3 dimensions works.
@@ -687,6 +727,7 @@ class TestIO:
        df = read_postgis(sql, engine, geom_col="geometry")
        assert list(df.geometry.has_z) == [True, True, True]

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_row_order(self, engine_postgis, df_nybb):
        """
        Tests that the row order in db table follows the order of the original frame.
@@ -703,6 +744,7 @@ class TestIO:
        df = read_postgis(sql, engine, geom_col="geometry")
        assert df["BoroCode"].tolist() == correct_order

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_append_before_table_exists(self, engine_postgis, df_nybb):
        """
        Tests that insert works with if_exists='append' when table does not exist yet.
@@ -720,6 +762,7 @@ class TestIO:
        df = read_postgis(sql, engine, geom_col="geometry")
        validate_boro_df(df)

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    def test_append_with_different_crs(self, engine_postgis, df_nybb):
        """
        Tests that the warning is raised if table CRS differs from frame.
@@ -736,9 +779,26 @@ class TestIO:
        with pytest.raises(ValueError, match="CRS of the target table"):
            write_postgis(df_nybb2, con=engine, name=table, if_exists="append")

+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_without_crs(self, engine_postgis, df_nybb):
+        # This test was included in #3328 when the default value for no
+        # CRS was changed from an SRID of -1 to 0. This resolves issues
+        # of appending dataframes to postgis that have no CRS as postgis
+        # no CRS value is 0.
+        engine = engine_postgis
+        df_nybb = df_nybb.set_crs(None, allow_override=True)
+        table = "nybb"
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # append another dataframe with no crs
+
+        df_nybb2 = df_nybb
+        write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
    @pytest.mark.xfail(
-        compat.PANDAS_GE_20 and not compat.PANDAS_GE_21,
-        reason="Duplicate columns are dropped in read_sql with pandas 2.0.x",
+        compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
+        reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
    )
    def test_duplicate_geometry_column_fails(self, engine_postgis):
        """
@@ -750,3 +810,69 @@ class TestIO:

        with pytest.raises(ValueError):
            read_postgis(sql, engine, geom_col="geom")
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="esri:54052")
+        create_postgis(con, df_nybb, srid=54052)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+        validate_boro_df(df)
+        assert df.crs == "ESRI:54052"
+
+    @pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+    @mock.patch("shapely.get_srid")
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
+        # mock a non-existent srid for edge case if shapely has an srid
+        # not present in postgis table.
+        pyproj = pytest.importorskip("pyproj")
+
+        mock_get_srid.return_value = 99999
+
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="epsg:4326")
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
+            with pytest.warns(UserWarning, match="Could not find srid 99999"):
+                read_postgis(sql, con)
+
+    @mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_no_spatial_ref_sys_table_in_postgis(
+        self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
+    ):
+        # mock for a non-existent spatial_ref_sys database
+
+        mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
+
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="epsg:4326")
+        create_postgis(con, df_nybb, srid=4326)
+
+        sql = "SELECT * FROM nybb;"
+        with pytest.warns(
+            UserWarning, match="Could not find the spatial reference system table"
+        ):
+            df = read_postgis(sql, con)
+
+        assert df.crs == "EPSG:4326"
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
+        """Test chunksize argument with non epsg crs"""
+        chunksize = 2
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="esri:54052")
+
+        create_postgis(con, df_nybb, srid=54052)
+
+        sql = "SELECT * FROM nybb;"
+        df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
+
+        validate_boro_df(df)
+        assert df.crs == "ESRI:54052"