This commit is contained in:
2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions

View File

@@ -19,6 +19,7 @@ pickles and test versus the current data that is generated
(with master). These are then compared.
"""
import os
import pickle
import platform
@@ -26,9 +27,10 @@ import sys
import pandas as pd
import geopandas
from shapely.geometry import Point
import geopandas
def create_pickle_data():
"""create the pickle data"""

View File

@@ -1,33 +1,41 @@
import datetime
import io
import json
import os
import pathlib
import shutil
import tempfile
from collections import OrderedDict
from packaging.version import Version
import numpy as np
import pandas as pd
import pytest
import pytz
from packaging.version import Version
from pandas.api.types import is_datetime64_any_dtype
from pandas.testing import assert_series_equal
from shapely.geometry import Point, Polygon, box
from shapely.geometry import Point, Polygon, box, mapping
import geopandas
from geopandas import GeoDataFrame, read_file
from geopandas._compat import PANDAS_GE_20
from geopandas.io.file import _detect_driver, _EXTENSION_TO_DRIVER
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20, PANDAS_GE_30
from geopandas.io.file import _EXTENSION_TO_DRIVER, _detect_driver
import pytest
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
from geopandas.tests.util import PACKAGE_DIR, validate_boro_df
from pandas.testing import assert_frame_equal, assert_series_equal
try:
import pyogrio
PYOGRIO_GE_07 = Version(pyogrio.__version__) > Version("0.6.0")
# those version checks have to be defined here instead of imported from
# geopandas.io.file (those are only initialized lazily on first usage)
PYOGRIO_GE_090 = Version(Version(pyogrio.__version__).base_version) >= Version(
"0.9.0"
)
except ImportError:
pyogrio = False
PYOGRIO_GE_07 = False
PYOGRIO_GE_090 = False
try:
@@ -46,6 +54,9 @@ FIONA_MARK = pytest.mark.skipif(not fiona, reason="fiona not installed")
_CRS = "epsg:4326"
pytestmark = pytest.mark.filterwarnings("ignore:Value:RuntimeWarning:pyogrio")
@pytest.fixture(
params=[
pytest.param("fiona", marks=FIONA_MARK),
@@ -62,9 +73,8 @@ def skip_pyogrio_not_supported(engine):
@pytest.fixture
def df_nybb(engine):
nybb_path = geopandas.datasets.get_path("nybb")
df = read_file(nybb_path, engine=engine)
def df_nybb(engine, nybb_filename):
df = read_file(nybb_filename, engine=engine)
return df
@@ -130,7 +140,7 @@ def test_to_file(tmpdir, df_nybb, df_null, driver, ext, engine):
df = GeoDataFrame.from_file(tempfilename, engine=engine)
assert "geometry" in df
assert len(df) == 5
assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"])
assert np.all(df["BoroName"].values == df_nybb["BoroName"])
# Write layer with null geometry out to file
tempfilename = os.path.join(str(tmpdir), "null_geom" + ext)
@@ -139,7 +149,7 @@ def test_to_file(tmpdir, df_nybb, df_null, driver, ext, engine):
df = GeoDataFrame.from_file(tempfilename, engine=engine)
assert "geometry" in df
assert len(df) == 2
assert np.alltrue(df["Name"].values == df_null["Name"])
assert np.all(df["Name"].values == df_null["Name"])
# check the expected driver
assert_correct_driver(tempfilename, ext, engine)
@@ -153,7 +163,7 @@ def test_to_file_pathlib(tmpdir, df_nybb, driver, ext, engine):
df = GeoDataFrame.from_file(temppath, engine=engine)
assert "geometry" in df
assert len(df) == 5
assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"])
assert np.all(df["BoroName"].values == df_nybb["BoroName"])
# check the expected driver
assert_correct_driver(temppath, ext, engine)
@@ -174,9 +184,10 @@ def test_to_file_bool(tmpdir, driver, ext, engine):
result = read_file(tempfilename, engine=engine)
if ext in (".shp", ""):
# Shapefile does not support boolean, so is read back as int
if engine == "fiona":
# but since GDAL 3.9 supports boolean fields in SHP
if engine == "fiona" and fiona.gdal_version.minor < 9:
df["col"] = df["col"].astype("int64")
else:
elif engine == "pyogrio" and pyogrio.__gdal_version__ < (3, 9):
df["col"] = df["col"].astype("int32")
assert_geodataframe_equal(result, df)
# check the expected driver
@@ -189,15 +200,15 @@ eastern = pytz.timezone("America/New_York")
datetime_type_tests = (TEST_DATE, eastern.localize(TEST_DATE))
@pytest.mark.filterwarnings(
"ignore:Non-conformant content for record 1 in column b:RuntimeWarning"
) # for GPKG, GDAL writes the tz data but warns on reading (see DATETIME_FORMAT option)
@pytest.mark.parametrize(
"time", datetime_type_tests, ids=("naive_datetime", "datetime_with_timezone")
)
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
def test_to_file_datetime(tmpdir, driver, ext, time, engine):
"""Test writing a data file with the datetime column type"""
if engine == "pyogrio" and time.tzinfo is not None:
# TODO
pytest.skip("pyogrio doesn't yet support timezones")
if ext in (".shp", ""):
pytest.skip(f"Driver corresponding to ext {ext} doesn't support dt fields")
@@ -207,23 +218,25 @@ def test_to_file_datetime(tmpdir, driver, ext, time, engine):
df = GeoDataFrame(
{"a": [1.0, 2.0], "b": [time, time]}, geometry=[point, point], crs=4326
)
fiona_precision_limit = "ms"
df["b"] = df["b"].dt.round(freq=fiona_precision_limit)
df["b"] = df["b"].dt.round(freq="ms")
df.to_file(tempfilename, driver=driver, engine=engine)
df_read = read_file(tempfilename, engine=engine)
assert_geodataframe_equal(df.drop(columns=["b"]), df_read.drop(columns=["b"]))
# Check datetime column
expected = df["b"]
if PANDAS_GE_20:
expected = df["b"].dt.as_unit("ms")
actual = df_read["b"]
if df["b"].dt.tz is not None:
# US/Eastern becomes pytz.FixedOffset(-300) when read from file
# so compare fairly in terms of UTC
assert_series_equal(
df["b"].dt.tz_convert(pytz.utc), df_read["b"].dt.tz_convert(pytz.utc)
)
else:
if engine == "pyogrio" and PANDAS_GE_20:
df["b"] = df["b"].astype("datetime64[ms]")
assert_series_equal(df["b"], df_read["b"])
# as GDAL only models offsets, not timezones.
# Compare fair result in terms of UTC instead
expected = expected.dt.tz_convert(pytz.utc)
actual = actual.dt.tz_convert(pytz.utc)
assert_series_equal(expected, actual)
dt_exts = ["gpkg", "geojson"]
@@ -239,7 +252,7 @@ def write_invalid_date_file(date_str, tmpdir, ext, engine):
)
# Schema not required for GeoJSON since not typed, but needed for GPKG
if ext == "geojson":
df.to_file(tempfilename)
df.to_file(tempfilename, engine=engine)
else:
schema = {"geometry": "Point", "properties": {"date": "datetime"}}
if engine == "pyogrio" and not fiona:
@@ -254,7 +267,7 @@ def test_read_file_datetime_invalid(tmpdir, ext, engine):
# https://github.com/geopandas/geopandas/issues/2502
date_str = "9999-99-99T00:00:00" # invalid date handled by GDAL
tempfilename = write_invalid_date_file(date_str, tmpdir, ext, engine)
res = read_file(tempfilename)
res = read_file(tempfilename, engine=engine)
if ext == "gpkg":
assert is_datetime64_any_dtype(res["date"])
assert pd.isna(res["date"].iloc[-1])
@@ -265,16 +278,19 @@ def test_read_file_datetime_invalid(tmpdir, ext, engine):
@pytest.mark.parametrize("ext", dt_exts)
def test_read_file_datetime_out_of_bounds_ns(tmpdir, ext, engine):
if engine == "pyogrio" and not PANDAS_GE_20:
pytest.skip("with pyogrio requires pandas >= 2.0 to pass")
# https://github.com/geopandas/geopandas/issues/2502
if ext == "geojson":
skip_pyogrio_not_supported(engine)
date_str = "9999-12-31T00:00:00" # valid to GDAL, not to [ns] format
tempfilename = write_invalid_date_file(date_str, tmpdir, ext, engine)
res = read_file(tempfilename)
# Pandas invalid datetimes are read in as object dtype (strings)
assert res["date"].dtype == "object"
assert isinstance(res["date"].iloc[0], str)
res = read_file(tempfilename, engine=engine)
if PANDAS_GE_30:
assert res["date"].dtype == "datetime64[ms]"
assert res["date"].iloc[-1] == pd.Timestamp("9999-12-31 00:00:00")
else:
# Pandas invalid datetimes are read in as object dtype (strings)
assert res["date"].dtype == "object"
assert isinstance(res["date"].iloc[0], str)
def test_read_file_datetime_mixed_offsets(tmpdir):
@@ -292,17 +308,13 @@ def test_read_file_datetime_mixed_offsets(tmpdir):
df.to_file(tempfilename)
# check mixed tz don't crash GH2478
res = read_file(tempfilename)
if engine == "fiona":
# Convert mixed timezones to UTC equivalent
assert is_datetime64_any_dtype(res["date"])
if not PANDAS_GE_20:
utc = pytz.utc
else:
utc = datetime.timezone.utc
assert res["date"].dt.tz == utc
# Convert mixed timezones to UTC equivalent
assert is_datetime64_any_dtype(res["date"])
if not PANDAS_GE_20:
utc = pytz.utc
else:
# old fiona and pyogrio ignore timezones and read as datetimes successfully
assert is_datetime64_any_dtype(res["date"])
utc = datetime.timezone.utc
assert res["date"].dt.tz == utc
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
@@ -365,14 +377,21 @@ def test_to_file_int32(tmpdir, df_points, engine, driver, ext):
df = GeoDataFrame(geometry=geometry)
df["data"] = pd.array([1, np.nan] * 5, dtype=pd.Int32Dtype())
df.to_file(tempfilename, driver=driver, engine=engine)
df_read = GeoDataFrame.from_file(tempfilename, driver=driver, engine=engine)
assert_geodataframe_equal(df_read, df, check_dtype=False, check_like=True)
df_read = GeoDataFrame.from_file(tempfilename, engine=engine)
# the int column with missing values comes back as float
expected = df.copy()
expected["data"] = expected["data"].astype("float64")
assert_geodataframe_equal(df_read, expected, check_like=True)
tempfilename2 = os.path.join(str(tmpdir), f"int32_2.{ext}")
df2 = df.dropna()
df2.to_file(tempfilename2, driver=driver, engine=engine)
df2_read = GeoDataFrame.from_file(tempfilename2, engine=engine)
if engine == "pyogrio":
tempfilename2 = os.path.join(str(tmpdir), f"int32_2.{ext}")
df2 = df.dropna()
df2.to_file(tempfilename2, driver=driver, engine=engine)
df2_read = GeoDataFrame.from_file(tempfilename2, driver=driver, engine=engine)
assert df2_read["data"].dtype == "int32"
else:
# with the fiona engine the 32 bitwidth is not preserved
assert df2_read["data"].dtype == "int64"
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
@@ -382,8 +401,11 @@ def test_to_file_int64(tmpdir, df_points, engine, driver, ext):
df = GeoDataFrame(geometry=geometry)
df["data"] = pd.array([1, np.nan] * 5, dtype=pd.Int64Dtype())
df.to_file(tempfilename, driver=driver, engine=engine)
df_read = GeoDataFrame.from_file(tempfilename, driver=driver, engine=engine)
assert_geodataframe_equal(df_read, df, check_dtype=False, check_like=True)
df_read = GeoDataFrame.from_file(tempfilename, engine=engine)
# the int column with missing values comes back as float
expected = df.copy()
expected["data"] = expected["data"].astype("float64")
assert_geodataframe_equal(df_read, expected, check_like=True)
def test_to_file_empty(tmpdir, engine):
@@ -393,12 +415,6 @@ def test_to_file_empty(tmpdir, engine):
input_empty_df.to_file(tempfilename, engine=engine)
def test_to_file_privacy(tmpdir, df_nybb):
tempfilename = os.path.join(str(tmpdir), "test.shp")
with pytest.warns(FutureWarning):
geopandas.io.file.to_file(df_nybb, tempfilename)
def test_to_file_schema(tmpdir, df_nybb, engine):
"""
Ensure that the file is written according to the schema
@@ -431,12 +447,13 @@ def test_to_file_schema(tmpdir, df_nybb, engine):
assert result_schema == schema
def test_to_file_crs(tmpdir, engine):
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_to_file_crs(tmpdir, engine, nybb_filename):
"""
Ensure that the file is written according to the crs
if it is specified
"""
df = read_file(geopandas.datasets.get_path("nybb"), engine=engine)
df = read_file(nybb_filename, engine=engine)
tempfilename = os.path.join(str(tmpdir), "crs.shp")
# save correct CRS
@@ -445,7 +462,7 @@ def test_to_file_crs(tmpdir, engine):
assert result.crs == df.crs
if engine == "pyogrio":
with pytest.raises(ValueError, match="Passing 'crs' it not supported"):
with pytest.raises(ValueError, match="Passing 'crs' is not supported"):
df.to_file(tempfilename, crs=3857, engine=engine)
return
@@ -455,8 +472,7 @@ def test_to_file_crs(tmpdir, engine):
assert result.crs == "epsg:3857"
# specify CRS for gdf without one
df2 = df.copy()
df2.crs = None
df2 = df.set_crs(None, allow_override=True)
df2.to_file(tempfilename, crs=2263, engine=engine)
df = GeoDataFrame.from_file(tempfilename, engine=engine)
assert df.crs == "epsg:2263"
@@ -529,6 +545,7 @@ def test_mode_unsupported(tmpdir, df_nybb, engine):
df_nybb.to_file(tempfilename, mode="r", engine=engine)
@pytest.mark.filterwarnings("ignore:'crs' was not provided:UserWarning:pyogrio")
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
def test_empty_crs(tmpdir, driver, ext, engine):
"""Test handling of undefined CRS with GPKG driver (GH #1975)."""
@@ -548,7 +565,7 @@ def test_empty_crs(tmpdir, driver, ext, engine):
if ext == ".geojson":
# geojson by default assumes epsg:4326
df.crs = "EPSG:4326"
df.geometry.array.crs = "EPSG:4326"
assert_geodataframe_equal(result, df)
@@ -561,10 +578,11 @@ def test_empty_crs(tmpdir, driver, ext, engine):
NYBB_CRS = "epsg:2263"
def test_read_file(engine):
df = read_file(geopandas.datasets.get_path("nybb"), engine=engine)
def test_read_file(engine, nybb_filename):
df = read_file(nybb_filename, engine=engine)
validate_boro_df(df)
assert df.crs == NYBB_CRS
if HAS_PYPROJ:
assert df.crs == NYBB_CRS
expected_columns = ["BoroCode", "BoroName", "Shape_Leng", "Shape_Area"]
assert (df.columns[:-1] == expected_columns).all()
@@ -578,7 +596,7 @@ def test_read_file(engine):
"main/geopandas/tests/data/null_geom.geojson",
# url to zip file
"https://raw.githubusercontent.com/geopandas/geopandas/"
"main/geopandas/datasets/nybb_16a.zip",
"main/geopandas/tests/data/nybb_16a.zip",
# url to zipfile without extension
"https://geonode.goosocean.org/download/480",
# url to web service
@@ -596,6 +614,25 @@ def test_read_file_local_uri(file_path, engine):
assert isinstance(gdf, geopandas.GeoDataFrame)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_read_file_geojson_string_path(engine):
if engine == "pyogrio" and not PYOGRIO_GE_090:
pytest.skip("fixed in pyogrio 0.9.0")
expected = GeoDataFrame({"val_with_hash": ["row # 0"], "geometry": [Point(0, 1)]})
features = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {"val_with_hash": "row # 0"},
"geometry": {"type": "Point", "coordinates": [0.0, 1.0]},
}
],
}
df_read = read_file(json.dumps(features))
assert_geodataframe_equal(expected.set_crs("EPSG:4326"), df_read)
def test_read_file_textio(file_path, engine):
file_text_stream = open(file_path)
file_stringio = io.StringIO(open(file_path).read())
@@ -648,11 +685,11 @@ def test_read_file_tempfile(engine):
temp.close()
def test_read_binary_file_fsspec(engine):
def test_read_binary_file_fsspec(engine, nybb_filename):
fsspec = pytest.importorskip("fsspec")
# Remove the zip scheme so fsspec doesn't open as a zipped file,
# instead we want to read as bytes and let fiona decode it.
path = geopandas.datasets.get_path("nybb")[6:]
path = nybb_filename[6:]
with fsspec.open(path, "rb") as f:
gdf = read_file(f, engine=engine)
assert isinstance(gdf, geopandas.GeoDataFrame)
@@ -665,10 +702,10 @@ def test_read_text_file_fsspec(file_path, engine):
assert isinstance(gdf, geopandas.GeoDataFrame)
def test_infer_zipped_file(engine):
def test_infer_zipped_file(engine, nybb_filename):
# Remove the zip scheme so that the test for a zipped file can
# check it and add it back.
path = geopandas.datasets.get_path("nybb")[6:]
path = nybb_filename[6:]
gdf = read_file(path, engine=engine)
assert isinstance(gdf, geopandas.GeoDataFrame)
@@ -683,15 +720,24 @@ def test_infer_zipped_file(engine):
assert isinstance(gdf, geopandas.GeoDataFrame)
def test_allow_legacy_gdal_path(engine):
def test_allow_legacy_gdal_path(engine, nybb_filename):
# Construct a GDAL-style zip path.
path = "/vsizip/" + geopandas.datasets.get_path("nybb")[6:]
path = "/vsizip/" + nybb_filename[6:]
gdf = read_file(path, engine=engine)
assert isinstance(gdf, geopandas.GeoDataFrame)
def test_read_file_filtered__bbox(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
@pytest.mark.skipif(not PYOGRIO_GE_090, reason="bug fixed in pyogrio 0.9.0")
def test_read_file_with_hash_in_path(engine, nybb_filename, tmp_path):
folder_with_hash = tmp_path / "path with # present"
folder_with_hash.mkdir(exist_ok=True, parents=True)
read_path = folder_with_hash / "nybb.zip"
shutil.copy(nybb_filename[6:], read_path)
gdf = read_file(read_path, engine=engine)
assert isinstance(gdf, geopandas.GeoDataFrame)
def test_read_file_bbox_tuple(df_nybb, engine, nybb_filename):
bbox = (
1031051.7879884212,
224272.49231459625,
@@ -703,8 +749,7 @@ def test_read_file_filtered__bbox(df_nybb, engine):
assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True))
def test_read_file_filtered__bbox__polygon(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
def test_read_file_bbox_polygon(df_nybb, engine, nybb_filename):
bbox = box(
1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244
)
@@ -713,14 +758,12 @@ def test_read_file_filtered__bbox__polygon(df_nybb, engine):
assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True))
def test_read_file_filtered__rows(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
def test_read_file_filtered__rows(df_nybb, engine, nybb_filename):
filtered_df = read_file(nybb_filename, rows=1, engine=engine)
assert_geodataframe_equal(filtered_df, df_nybb.iloc[[0], :])
def test_read_file_filtered__rows_slice(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
def test_read_file_filtered__rows_slice(df_nybb, engine, nybb_filename):
filtered_df = read_file(nybb_filename, rows=slice(1, 3), engine=engine)
assert_geodataframe_equal(filtered_df, df_nybb.iloc[1:3, :].reset_index(drop=True))
@@ -728,21 +771,14 @@ def test_read_file_filtered__rows_slice(df_nybb, engine):
@pytest.mark.filterwarnings(
"ignore:Layer does not support OLC_FASTFEATURECOUNT:RuntimeWarning"
) # for the slice with -1
def test_read_file_filtered__rows_bbox(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
def test_read_file_filtered__rows_bbox(df_nybb, engine, nybb_filename):
bbox = (
1031051.7879884212,
224272.49231459625,
1047224.3104931959,
244317.30894023244,
)
if engine == "pyogrio" and not PYOGRIO_GE_07:
with pytest.raises(ValueError, match="'skip_features' must be between 0 and 1"):
# combination bbox and rows (rows slice applied after bbox filtering!)
filtered_df = read_file(
nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine
)
else: # fiona
if engine == "fiona":
# combination bbox and rows (rows slice applied after bbox filtering!)
filtered_df = read_file(
nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine
@@ -768,16 +804,14 @@ def test_read_file_filtered__rows_bbox(df_nybb, engine):
)
def test_read_file_filtered_rows_invalid(engine):
def test_read_file_filtered_rows_invalid(engine, nybb_filename):
with pytest.raises(TypeError):
read_file(
geopandas.datasets.get_path("nybb"), rows="not_a_slice", engine=engine
)
read_file(nybb_filename, rows="not_a_slice", engine=engine)
def test_read_file__ignore_geometry(engine):
def test_read_file__ignore_geometry(engine, naturalearth_lowres):
pdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
naturalearth_lowres,
ignore_geometry=True,
engine=engine,
)
@@ -785,20 +819,73 @@ def test_read_file__ignore_geometry(engine):
assert isinstance(pdf, pd.DataFrame) and not isinstance(pdf, geopandas.GeoDataFrame)
def test_read_file__ignore_all_fields(engine):
skip_pyogrio_not_supported(engine) # pyogrio has "columns" keyword instead
@pytest.mark.filterwarnings(
"ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
)
def test_read_file__ignore_fields(engine, naturalearth_lowres):
gdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
naturalearth_lowres,
ignore_fields=["pop_est", "continent", "iso_a3", "gdp_md_est"],
engine=engine,
)
assert gdf.columns.tolist() == ["name", "geometry"]
@pytest.mark.filterwarnings(
"ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
)
def test_read_file__ignore_all_fields(engine, naturalearth_lowres):
gdf = geopandas.read_file(
naturalearth_lowres,
ignore_fields=["pop_est", "continent", "name", "iso_a3", "gdp_md_est"],
engine="fiona",
engine=engine,
)
assert gdf.columns.tolist() == ["geometry"]
def test_read_file__where_filter(engine):
def test_read_file_missing_geometry(tmpdir, engine):
filename = str(tmpdir / "test.csv")
expected = pd.DataFrame(
{"col1": np.array([1, 2, 3], dtype="int64"), "col2": ["a", "b", "c"]}
)
expected.to_csv(filename, index=False)
df = geopandas.read_file(filename, engine=engine)
# both engines read integers as strings; force back to original type
df["col1"] = df["col1"].astype("int64")
assert isinstance(df, pd.DataFrame)
assert not isinstance(df, geopandas.GeoDataFrame)
assert_frame_equal(df, expected)
def test_read_file_None_attribute(tmp_path, engine):
# Test added in context of https://github.com/geopandas/geopandas/issues/2901
test_path = tmp_path / "test.gpkg"
gdf = GeoDataFrame(
{"a": [None, None]}, geometry=[Point(1, 2), Point(3, 4)], crs=4326
)
gdf.to_file(test_path, engine=engine)
read_gdf = read_file(test_path, engine=engine)
assert_geodataframe_equal(gdf, read_gdf)
def test_read_csv_dtype(tmpdir, df_nybb):
filename = str(tmpdir / "test.csv")
df_nybb.to_csv(filename, index=False)
pdf = pd.read_csv(filename, dtype={"geometry": "geometry"})
assert pdf.geometry.dtype == "geometry"
def test_read_file__where_filter(engine, naturalearth_lowres):
if FIONA_GE_19 or engine == "pyogrio":
gdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
naturalearth_lowres,
where="continent='Africa'",
engine=engine,
)
@@ -806,26 +893,75 @@ def test_read_file__where_filter(engine):
else:
with pytest.raises(NotImplementedError):
geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
naturalearth_lowres,
where="continent='Africa'",
engine="fiona",
)
@PYOGRIO_MARK
def test_read_file__columns():
# TODO: this is only support for pyogrio, but we could mimic it for fiona as well
def test_read_file__columns(engine, naturalearth_lowres):
if engine == "fiona" and not FIONA_GE_19:
pytest.skip("columns requires fiona 1.9+")
gdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
columns=["name", "pop_est"],
engine="pyogrio",
naturalearth_lowres, columns=["name", "pop_est"], engine=engine
)
assert gdf.columns.tolist() == ["name", "pop_est", "geometry"]
def test_read_file_filtered_with_gdf_boundary(df_nybb, engine):
def test_read_file__columns_empty(engine, naturalearth_lowres):
if engine == "fiona" and not FIONA_GE_19:
pytest.skip("columns requires fiona 1.9+")
gdf = geopandas.read_file(naturalearth_lowres, columns=[], engine=engine)
assert gdf.columns.tolist() == ["geometry"]
@pytest.mark.skipif(FIONA_GE_19 or not fiona, reason="test for fiona < 1.9")
def test_read_file__columns_old_fiona(naturalearth_lowres):
with pytest.raises(NotImplementedError):
geopandas.read_file(
naturalearth_lowres, columns=["name", "pop_est"], engine="fiona"
)
@pytest.mark.filterwarnings(
"ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
)
def test_read_file__include_fields(engine, naturalearth_lowres):
if engine == "fiona" and not FIONA_GE_19:
pytest.skip("columns requires fiona 1.9+")
gdf = geopandas.read_file(
naturalearth_lowres, include_fields=["name", "pop_est"], engine=engine
)
assert gdf.columns.tolist() == ["name", "pop_est", "geometry"]
@pytest.mark.skipif(not FIONA_GE_19, reason="columns requires fiona 1.9+")
def test_read_file__columns_conflicting_keywords(engine, naturalearth_lowres):
path = naturalearth_lowres
with pytest.raises(ValueError, match="Cannot specify both"):
geopandas.read_file(
path, include_fields=["name"], ignore_fields=["pop_est"], engine=engine
)
with pytest.raises(ValueError, match="Cannot specify both"):
geopandas.read_file(
path, columns=["name"], include_fields=["pop_est"], engine=engine
)
with pytest.raises(ValueError, match="Cannot specify both"):
geopandas.read_file(
path, columns=["name"], ignore_fields=["pop_est"], engine=engine
)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
@pytest.mark.parametrize("file_like", [False, True])
def test_read_file_bbox_gdf(df_nybb, engine, nybb_filename, file_like):
full_df_shape = df_nybb.shape
nybb_filename = geopandas.datasets.get_path("nybb")
bbox = geopandas.GeoDataFrame(
geometry=[
box(
@@ -837,28 +973,41 @@ def test_read_file_filtered_with_gdf_boundary(df_nybb, engine):
],
crs=NYBB_CRS,
)
filtered_df = read_file(nybb_filename, bbox=bbox, engine=engine)
infile = (
open(nybb_filename.replace("zip://", ""), "rb") if file_like else nybb_filename
)
filtered_df = read_file(infile, bbox=bbox, engine=engine)
filtered_df_shape = filtered_df.shape
assert full_df_shape != filtered_df_shape
assert filtered_df_shape == (2, 5)
def test_read_file_filtered_with_gdf_boundary__mask(df_nybb, engine):
skip_pyogrio_not_supported(engine)
gdf_mask = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))
gdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_cities"),
mask=gdf_mask[gdf_mask.continent == "Africa"],
engine=engine,
)
filtered_df_shape = gdf.shape
assert filtered_df_shape == (57, 2)
def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb, engine):
skip_pyogrio_not_supported(engine)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
@pytest.mark.parametrize("file_like", [False, True])
def test_read_file_mask_gdf(df_nybb, engine, nybb_filename, file_like):
full_df_shape = df_nybb.shape
mask = geopandas.GeoDataFrame(
geometry=[
box(
1031051.7879884212,
224272.49231459625,
1047224.3104931959,
244317.30894023244,
)
],
crs=NYBB_CRS,
)
infile = (
open(nybb_filename.replace("zip://", ""), "rb") if file_like else nybb_filename
)
filtered_df = read_file(infile, mask=mask, engine=engine)
filtered_df_shape = filtered_df.shape
assert full_df_shape != filtered_df_shape
assert filtered_df_shape == (2, 5)
def test_read_file_mask_polygon(df_nybb, engine, nybb_filename):
full_df_shape = df_nybb.shape
nybb_filename = geopandas.datasets.get_path("nybb")
mask = box(
1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244
)
@@ -868,10 +1017,25 @@ def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb, engine):
assert filtered_df_shape == (2, 5)
def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb, engine):
skip_pyogrio_not_supported(engine)
def test_read_file_mask_geojson(df_nybb, nybb_filename, engine):
full_df_shape = df_nybb.shape
mask = mapping(
box(
1031051.7879884212,
224272.49231459625,
1047224.3104931959,
244317.30894023244,
)
)
filtered_df = read_file(nybb_filename, mask=mask, engine=engine)
filtered_df_shape = filtered_df.shape
assert full_df_shape != filtered_df_shape
assert filtered_df_shape == (2, 5)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_read_file_bbox_gdf_mismatched_crs(df_nybb, engine, nybb_filename):
full_df_shape = df_nybb.shape
nybb_filename = geopandas.datasets.get_path("nybb")
bbox = geopandas.GeoDataFrame(
geometry=[
box(
@@ -890,10 +1054,9 @@ def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb, engine):
assert filtered_df_shape == (2, 5)
def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb, engine):
skip_pyogrio_not_supported(engine)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_read_file_mask_gdf_mismatched_crs(df_nybb, engine, nybb_filename):
full_df_shape = df_nybb.shape
nybb_filename = geopandas.datasets.get_path("nybb")
mask = geopandas.GeoDataFrame(
geometry=[
box(
@@ -912,6 +1075,20 @@ def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb, engi
assert filtered_df_shape == (2, 5)
def test_read_file_bbox_mask_not_allowed(engine, nybb_filename):
bbox = (
1031051.7879884212,
224272.49231459625,
1047224.3104931959,
244317.30894023244,
)
mask = box(*bbox)
with pytest.raises(ValueError, match="mask and bbox can not be set together"):
read_file(nybb_filename, bbox=bbox, mask=mask)
@pytest.mark.filterwarnings(
"ignore:Layer 'b'test_empty'' does not have any features:UserWarning"
)
@@ -942,11 +1119,6 @@ def test_read_file_empty_shapefile(tmpdir, engine):
assert all(empty.columns == ["A", "Z", "geometry"])
def test_read_file_privacy(tmpdir, df_nybb):
with pytest.warns(FutureWarning):
geopandas.io.file.read_file(geopandas.datasets.get_path("nybb"))
class FileNumber(object):
def __init__(self, tmpdir, base, ext):
self.tmpdir = str(tmpdir)
@@ -1113,7 +1285,7 @@ def test_write_index_to_file(tmpdir, df_points, driver, ext, engine):
# index as string
df_p = df_points.copy()
df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
df.index = pd.TimedeltaIndex(range(len(df)), "days")
df.index = pd.to_timedelta(range(len(df)), unit="days")
# TODO: TimedeltaIndex is an invalid field type
df.index = df.index.astype(str)
do_checks(df, index_is_used=True)
@@ -1121,7 +1293,7 @@ def test_write_index_to_file(tmpdir, df_points, driver, ext, engine):
# unnamed DatetimeIndex
df_p = df_points.copy()
df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
df.index = pd.TimedeltaIndex(range(len(df)), "days") + pd.DatetimeIndex(
df.index = pd.to_timedelta(range(len(df)), unit="days") + pd.to_datetime(
["1999-12-27"] * len(df)
)
if driver == "ESRI Shapefile":
@@ -1152,6 +1324,54 @@ def test_write_read_file(test_file, engine):
os.remove(os.path.expanduser(test_file))
@pytest.mark.skipif(fiona is False, reason="Fiona not available")
@pytest.mark.skipif(FIONA_GE_19, reason="Fiona >= 1.9 supports metadata")
def test_to_file_metadata_unsupported_fiona_version(tmp_path, df_points):
metadata = {"title": "test"}
tmp_file = tmp_path / "test.gpkg"
match = "'metadata' keyword is only supported for Fiona >= 1.9"
with pytest.raises(NotImplementedError, match=match):
df_points.to_file(tmp_file, driver="GPKG", engine="fiona", metadata=metadata)
@pytest.mark.skipif(not FIONA_GE_19, reason="only Fiona >= 1.9 supports metadata")
def test_to_file_metadata_supported_fiona_version(tmp_path, df_points):
metadata = {"title": "test"}
tmp_file = tmp_path / "test.gpkg"
df_points.to_file(tmp_file, driver="GPKG", engine="fiona", metadata=metadata)
# Check that metadata is written to the file
with fiona.open(tmp_file) as src:
tags = src.tags()
assert tags == metadata
@pytest.mark.skipif(pyogrio is False, reason="Pyogrio not available")
def test_to_file_metadata_pyogrio(tmp_path, df_points):
metadata = {"title": "test"}
tmp_file = tmp_path / "test.gpkg"
df_points.to_file(tmp_file, driver="GPKG", engine="pyogrio", metadata=metadata)
# Check that metadata is written to the file
info = pyogrio.read_info(tmp_file)
layer_metadata = info["layer_metadata"]
assert layer_metadata == metadata
@pytest.mark.parametrize(
"driver, ext", [("ESRI Shapefile", ".shp"), ("GeoJSON", ".geojson")]
)
def test_to_file_metadata_unsupported_driver(driver, ext, tmpdir, df_points, engine):
metadata = {"title": "Test"}
tempfilename = os.path.join(str(tmpdir), "test" + ext)
with pytest.raises(
NotImplementedError, match="'metadata' keyword is only supported for"
):
df_points.to_file(tempfilename, driver=driver, metadata=metadata)
def test_multiple_geom_cols_error(tmpdir, df_nybb):
df_nybb["geom2"] = df_nybb.geometry
with pytest.raises(ValueError, match="GeoDataFrame contains multiple geometry"):
@@ -1160,7 +1380,7 @@ def test_multiple_geom_cols_error(tmpdir, df_nybb):
@PYOGRIO_MARK
@FIONA_MARK
def test_option_io_engine():
def test_option_io_engine(nybb_filename):
try:
geopandas.options.io_engine = "pyogrio"
@@ -1171,8 +1391,48 @@ def test_option_io_engine():
orig = fiona.supported_drivers["ESRI Shapefile"]
fiona.supported_drivers["ESRI Shapefile"] = "w"
nybb_filename = geopandas.datasets.get_path("nybb")
_ = geopandas.read_file(nybb_filename)
finally:
fiona.supported_drivers["ESRI Shapefile"] = orig
geopandas.options.io_engine = None
@pytest.mark.skipif(pyogrio, reason="test for pyogrio not installed")
def test_error_engine_unavailable_pyogrio(tmp_path, df_points, file_path):
with pytest.raises(ImportError, match="the 'read_file' function requires"):
geopandas.read_file(file_path, engine="pyogrio")
with pytest.raises(ImportError, match="the 'to_file' method requires"):
df_points.to_file(tmp_path / "test.gpkg", engine="pyogrio")
@pytest.mark.skipif(fiona, reason="test for fiona not installed")
def test_error_engine_unavailable_fiona(tmp_path, df_points, file_path):
with pytest.raises(ImportError, match="the 'read_file' function requires"):
geopandas.read_file(file_path, engine="fiona")
with pytest.raises(ImportError, match="the 'to_file' method requires"):
df_points.to_file(tmp_path / "test.gpkg", engine="fiona")
@PYOGRIO_MARK
def test_list_layers(df_points, tmpdir):
tempfilename = os.path.join(str(tmpdir), "dataset.gpkg")
df_points.to_file(tempfilename, layer="original")
df_points.set_geometry(df_points.buffer(1)).to_file(tempfilename, layer="buffered")
df_points.set_geometry(df_points.buffer(2).boundary).to_file(
tempfilename, layer="boundary"
)
pyogrio.write_dataframe(
df_points[["value1", "value2"]], tempfilename, layer="non-spatial"
)
layers = geopandas.list_layers(tempfilename)
expected = pd.DataFrame(
{
"name": ["original", "buffered", "boundary", "non-spatial"],
"geometry_type": ["Point", "Polygon", "LineString", None],
}
)
assert_frame_equal(layers, expected)

View File

@@ -12,11 +12,10 @@ from shapely.geometry import (
import geopandas
from geopandas import GeoDataFrame
from geopandas.testing import assert_geodataframe_equal
import pytest
from .test_file import FIONA_MARK, PYOGRIO_MARK
import pytest
from geopandas.testing import assert_geodataframe_equal
# Credit: Polygons below come from Montreal city Open Data portal
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
@@ -244,7 +243,14 @@ def geodataframe(request):
return request.param
@pytest.fixture(params=["GeoJSON", "ESRI Shapefile", "GPKG", "SQLite"])
@pytest.fixture(
params=[
("GeoJSON", ".geojson"),
("ESRI Shapefile", ".shp"),
("GPKG", ".gpkg"),
("SQLite", ".sqlite"),
]
)
def ogr_driver(request):
return request.param
@@ -260,16 +266,18 @@ def engine(request):
def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
output_file = os.path.join(str(tmpdir), "output_file")
driver, ext = ogr_driver
output_file = os.path.join(str(tmpdir), "output_file" + ext)
write_kwargs = {}
if ogr_driver == "SQLite":
if driver == "SQLite":
write_kwargs["spatialite"] = True
# This if statement can be removed once minimal fiona version >= 1.8.20
if engine == "fiona":
import fiona
from packaging.version import Version
import fiona
if Version(fiona.__version__) < Version("1.8.20"):
pytest.skip("SQLite driver only available from version 1.8.20")
@@ -285,22 +293,35 @@ def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
):
write_kwargs["geometry_type"] = "Point Z"
expected_error = _expected_error_on(geodataframe, ogr_driver)
expected_error = _expected_error_on(geodataframe, driver)
if expected_error:
with pytest.raises(
RuntimeError, match="Failed to write record|Could not add feature to layer"
):
geodataframe.to_file(
output_file, driver=ogr_driver, engine=engine, **write_kwargs
output_file, driver=driver, engine=engine, **write_kwargs
)
else:
geodataframe.to_file(
output_file, driver=ogr_driver, engine=engine, **write_kwargs
)
if driver == "SQLite" and engine == "pyogrio":
try:
geodataframe.to_file(
output_file, driver=driver, engine=engine, **write_kwargs
)
except ValueError as e:
if "unrecognized option 'SPATIALITE'" in str(e):
pytest.xfail(
"pyogrio wheels from PyPI do not come with SpatiaLite support. "
f"Error: {e}"
)
raise
else:
geodataframe.to_file(
output_file, driver=driver, engine=engine, **write_kwargs
)
reloaded = geopandas.read_file(output_file, engine=engine)
if ogr_driver == "GeoJSON" and engine == "pyogrio":
if driver == "GeoJSON" and engine == "pyogrio":
# For GeoJSON files, the int64 column comes back as int32
reloaded["a"] = reloaded["a"].astype("int64")

View File

@@ -1,5 +1,8 @@
from collections import OrderedDict
import numpy as np
import pandas as pd
from shapely.geometry import (
LineString,
MultiLineString,
@@ -9,12 +12,11 @@ from shapely.geometry import (
Polygon,
)
import pandas as pd
import pytest
import numpy as np
from geopandas import GeoDataFrame
from geopandas.io.file import infer_schema
import pytest
# Credit: Polygons below come from Montreal city Open Data portal
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
city_hall_boundaries = Polygon(

View File

@@ -2,7 +2,7 @@
See generate_legacy_storage_files.py for the creation of the legacy files.
"""
from contextlib import contextmanager
import glob
import os
import pathlib
@@ -11,9 +11,6 @@ import pandas as pd
import pytest
from geopandas.testing import assert_geodataframe_equal
from geopandas import _compat as compat
import geopandas
from shapely.geometry import Point
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
@@ -34,18 +31,7 @@ def legacy_pickle(request):
return request.param
@contextmanager
def with_use_pygeos(option):
orig = geopandas.options.use_pygeos
geopandas.options.use_pygeos = option
try:
yield
finally:
geopandas.options.use_pygeos = orig
@pytest.mark.skipif(
compat.USE_SHAPELY_20 or compat.USE_PYGEOS,
@pytest.mark.skip(
reason=(
"shapely 2.0/pygeos-based unpickling currently only works for "
"shapely-2.0/pygeos-written files"
@@ -68,43 +54,3 @@ def test_round_trip_current(tmpdir, current_pickle_data):
result = pd.read_pickle(path)
assert_geodataframe_equal(result, value)
assert isinstance(result.has_sindex, bool)
def _create_gdf():
return geopandas.GeoDataFrame(
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
crs="EPSG:4326",
)
@pytest.mark.skipif(not compat.HAS_PYGEOS, reason="requires pygeos to test #1745")
def test_pygeos_switch(tmpdir):
# writing and reading with pygeos disabled
with with_use_pygeos(False):
gdf = _create_gdf()
path = str(tmpdir / "gdf_crs1.pickle")
gdf.to_pickle(path)
result = pd.read_pickle(path)
assert_geodataframe_equal(result, gdf)
# writing without pygeos, reading with pygeos
with with_use_pygeos(False):
gdf = _create_gdf()
path = str(tmpdir / "gdf_crs1.pickle")
gdf.to_pickle(path)
with with_use_pygeos(True):
result = pd.read_pickle(path)
gdf = _create_gdf()
assert_geodataframe_equal(result, gdf)
# writing with pygeos, reading without pygeos
with with_use_pygeos(True):
gdf = _create_gdf()
path = str(tmpdir / "gdf_crs1.pickle")
gdf.to_pickle(path)
with with_use_pygeos(False):
result = pd.read_pickle(path)
gdf = _create_gdf()
assert_geodataframe_equal(result, gdf)

View File

@@ -4,18 +4,27 @@ The spatial database tests may not work without additional system
configuration. postGIS tests require a test database to have been setup;
see geopandas.tests.util for more information.
"""
import os
import warnings
from importlib.util import find_spec
import pandas as pd
import geopandas
from geopandas import GeoDataFrame, read_file, read_postgis
import geopandas._compat as compat
from geopandas.io.sql import _get_conn as get_conn, _write_postgis as write_postgis
from geopandas.tests.util import create_postgis, create_spatialite, validate_boro_df
from geopandas import GeoDataFrame, read_file, read_postgis
from geopandas._compat import HAS_PYPROJ
from geopandas.io.sql import _get_conn as get_conn
from geopandas.io.sql import _write_postgis as write_postgis
import pytest
from geopandas.tests.util import (
create_postgis,
create_spatialite,
mock,
validate_boro_df,
)
try:
from sqlalchemy import text
@@ -26,31 +35,48 @@ except ImportError:
@pytest.fixture
def df_nybb():
nybb_path = geopandas.datasets.get_path("nybb")
df = read_file(nybb_path)
def df_nybb(nybb_filename):
df = read_file(nybb_filename)
return df
@pytest.fixture()
def connection_postgis():
def check_available_postgis_drivers() -> list[str]:
"""Work out which of psycopg2 and psycopg are available.
This prevents tests running if the relevant package isn't installed
(rather than being skipped, as skips are treated as failures during postgis CI)
"""
Initiates a connection to a postGIS database that must already exist.
See create_postgis for more information.
"""
psycopg2 = pytest.importorskip("psycopg2")
from psycopg2 import OperationalError
drivers = []
if find_spec("psycopg"):
drivers.append("psycopg")
if find_spec("psycopg2"):
drivers.append("psycopg2")
return drivers
POSTGIS_DRIVERS = check_available_postgis_drivers()
def prepare_database_credentials() -> dict:
"""Gather postgres connection credentials from environment variables."""
return {
"dbname": "test_geopandas",
"user": os.environ.get("PGUSER"),
"password": os.environ.get("PGPASSWORD"),
"host": os.environ.get("PGHOST"),
"port": os.environ.get("PGPORT"),
}
@pytest.fixture()
def connection_postgis(request):
"""Create a postgres connection using either psycopg2 or psycopg.
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
psycopg = pytest.importorskip(request.param)
dbname = "test_geopandas"
user = os.environ.get("PGUSER")
password = os.environ.get("PGPASSWORD")
host = os.environ.get("PGHOST")
port = os.environ.get("PGPORT")
try:
con = psycopg2.connect(
dbname=dbname, user=user, password=password, host=host, port=port
)
except OperationalError:
con = psycopg.connect(**prepare_database_credentials())
except psycopg.OperationalError:
pytest.skip("Cannot connect with postgresql database")
with warnings.catch_warnings():
warnings.filterwarnings(
@@ -61,28 +87,25 @@ def connection_postgis():
@pytest.fixture()
def engine_postgis():
def engine_postgis(request):
"""
Initiates a connection engine to a postGIS database that must already exist.
Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
"""
sqlalchemy = pytest.importorskip("sqlalchemy")
from sqlalchemy.engine.url import URL
user = os.environ.get("PGUSER")
password = os.environ.get("PGPASSWORD")
host = os.environ.get("PGHOST")
port = os.environ.get("PGPORT")
dbname = "test_geopandas"
credentials = prepare_database_credentials()
try:
con = sqlalchemy.create_engine(
URL.create(
drivername="postgresql+psycopg2",
username=user,
database=dbname,
password=password,
host=host,
port=port,
drivername=f"postgresql+{request.param}",
username=credentials["user"],
database=credentials["dbname"],
password=credentials["password"],
host=credentials["host"],
port=credentials["port"],
)
)
con.connect()
@@ -140,7 +163,7 @@ def drop_table_if_exists(conn_or_engine, table):
@pytest.fixture
def df_mixed_single_and_multi():
from shapely.geometry import Point, LineString, MultiLineString
from shapely.geometry import LineString, MultiLineString, Point
df = geopandas.GeoDataFrame(
{
@@ -157,7 +180,7 @@ def df_mixed_single_and_multi():
@pytest.fixture
def df_geom_collection():
from shapely.geometry import Point, LineString, Polygon, GeometryCollection
from shapely.geometry import GeometryCollection, LineString, Point, Polygon
df = geopandas.GeoDataFrame(
{
@@ -188,7 +211,7 @@ def df_linear_ring():
@pytest.fixture
def df_3D_geoms():
from shapely.geometry import Point, LineString, Polygon
from shapely.geometry import LineString, Point, Polygon
df = geopandas.GeoDataFrame(
{
@@ -204,6 +227,7 @@ def df_3D_geoms():
class TestIO:
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_get_conn(self, engine_postgis):
Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
@@ -217,6 +241,7 @@ class TestIO:
with get_conn(object()):
pass
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_default(self, connection_postgis, df_nybb):
con = connection_postgis
create_postgis(con, df_nybb)
@@ -229,6 +254,7 @@ class TestIO:
# by user; should not be set to 0, as from get_srid failure
assert df.crs is None
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
con = connection_postgis
geom_col = "the_geom"
@@ -239,6 +265,7 @@ class TestIO:
validate_boro_df(df)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
"""Tests that a SELECT {geom} AS {some_other_geom} works."""
con = connection_postgis
@@ -254,6 +281,7 @@ class TestIO:
validate_boro_df(df)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
"""Tests that an SRID can be read from a geodatabase (GH #451)."""
con = connection_postgis
@@ -267,6 +295,7 @@ class TestIO:
validate_boro_df(df)
assert df.crs == crs
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
"""Tests that a user specified CRS overrides the geodatabase SRID."""
con = connection_postgis
@@ -279,6 +308,7 @@ class TestIO:
validate_boro_df(df)
assert df.crs == orig_crs
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_from_postgis_default(self, connection_postgis, df_nybb):
con = connection_postgis
create_postgis(con, df_nybb)
@@ -288,6 +318,7 @@ class TestIO:
validate_boro_df(df, case_sensitive=False)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
con = connection_postgis
geom_col = "the_geom"
@@ -323,6 +354,7 @@ class TestIO:
df = read_postgis(sql, con, geom_col=geom_col)
validate_boro_df(df)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
"""Test chunksize argument"""
chunksize = 2
@@ -337,14 +369,7 @@ class TestIO:
# by user; should not be set to 0, as from get_srid failure
assert df.crs is None
def test_read_postgis_privacy(self, connection_postgis, df_nybb):
con = connection_postgis
create_postgis(con, df_nybb)
sql = "SELECT * FROM nybb;"
with pytest.warns(FutureWarning):
geopandas.io.sql.read_postgis(sql, con)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_default(self, engine_postgis, df_nybb):
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
engine = engine_postgis
@@ -360,6 +385,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
"""Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
engine = engine_postgis
@@ -375,6 +401,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
with engine_postgis.begin() as con:
@@ -390,6 +417,7 @@ class TestIO:
df = read_postgis(sql, con, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
"""
Tests that uploading the same table raises error when: if_replace='fail'.
@@ -409,6 +437,7 @@ class TestIO:
else:
raise e
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
"""
Tests that replacing a table is possible when: if_replace='replace'.
@@ -426,6 +455,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
"""
Tests that appending to existing table produces correct results when:
@@ -445,15 +475,18 @@ class TestIO:
# There should be twice as many rows in the new table
assert new_rows == orig_rows * 2, (
"There should be {target} rows,"
"found: {current}".format(target=orig_rows * 2, current=new_rows),
"There should be {target} rows,found: {current}".format(
target=orig_rows * 2, current=new_rows
),
)
# Number of columns should stay the same
assert new_cols == orig_cols, (
"There should be {target} columns,"
"found: {current}".format(target=orig_cols, current=new_cols),
"There should be {target} columns,found: {current}".format(
target=orig_cols, current=new_cols
),
)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
"""
Tests that GeoDataFrame can be written to PostGIS without CRS information.
@@ -463,8 +496,7 @@ class TestIO:
table = "nybb"
# Write to db
df_nybb = df_nybb
df_nybb.crs = None
df_nybb.geometry.array.crs = None
with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# Validate that srid is -1
@@ -477,6 +509,7 @@ class TestIO:
target_srid = conn.execute(sql).fetchone()[0]
assert target_srid == 0, "SRID should be 0, found %s" % target_srid
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
"""
Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
@@ -499,6 +532,7 @@ class TestIO:
target_srid = conn.execute(sql).fetchone()[0]
assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_geometry_collection(
self, engine_postgis, df_geom_collection
):
@@ -525,6 +559,7 @@ class TestIO:
assert geom_type.upper() == "GEOMETRYCOLLECTION"
assert df.geom_type.unique()[0] == "GeometryCollection"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_mixed_geometry_types(
self, engine_postgis, df_mixed_single_and_multi
):
@@ -551,6 +586,7 @@ class TestIO:
assert res[1][0].upper() == "MULTILINESTRING"
assert res[2][0].upper() == "POINT"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
"""
Tests that writing a LinearRing.
@@ -572,6 +608,7 @@ class TestIO:
assert geom_type.upper() == "LINESTRING"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
"""
Tests writing a LinearRing works.
@@ -605,6 +642,7 @@ class TestIO:
assert res[1][0].upper() == "MULTILINESTRING"
assert res[2][0].upper() == "POINT"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
"""
Tests writing data to alternative schema.
@@ -628,6 +666,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_to_different_schema_when_table_exists(
self, engine_postgis, df_nybb
):
@@ -672,6 +711,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
"""
Tests writing a geometries with 3 dimensions works.
@@ -687,6 +727,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
assert list(df.geometry.has_z) == [True, True, True]
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_row_order(self, engine_postgis, df_nybb):
"""
Tests that the row order in db table follows the order of the original frame.
@@ -703,6 +744,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
assert df["BoroCode"].tolist() == correct_order
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_append_before_table_exists(self, engine_postgis, df_nybb):
"""
Tests that insert works with if_exists='append' when table does not exist yet.
@@ -720,6 +762,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_append_with_different_crs(self, engine_postgis, df_nybb):
"""
Tests that the warning is raised if table CRS differs from frame.
@@ -736,9 +779,26 @@ class TestIO:
with pytest.raises(ValueError, match="CRS of the target table"):
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_append_without_crs(self, engine_postgis, df_nybb):
# This test was included in #3328 when the default value for no
# CRS was changed from an SRID of -1 to 0. This resolves issues
# of appending dataframes to postgis that have no CRS as postgis
# no CRS value is 0.
engine = engine_postgis
df_nybb = df_nybb.set_crs(None, allow_override=True)
table = "nybb"
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# append another dataframe with no crs
df_nybb2 = df_nybb
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
@pytest.mark.xfail(
compat.PANDAS_GE_20 and not compat.PANDAS_GE_21,
reason="Duplicate columns are dropped in read_sql with pandas 2.0.x",
compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
)
def test_duplicate_geometry_column_fails(self, engine_postgis):
"""
@@ -750,3 +810,69 @@ class TestIO:
with pytest.raises(ValueError):
read_postgis(sql, engine, geom_col="geom")
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="esri:54052")
create_postgis(con, df_nybb, srid=54052)
sql = "SELECT * FROM nybb;"
df = read_postgis(sql, con)
validate_boro_df(df)
assert df.crs == "ESRI:54052"
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
@mock.patch("shapely.get_srid")
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
# mock a non-existent srid for edge case if shapely has an srid
# not present in postgis table.
pyproj = pytest.importorskip("pyproj")
mock_get_srid.return_value = 99999
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="epsg:4326")
create_postgis(con, df_nybb)
sql = "SELECT * FROM nybb;"
with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
with pytest.warns(UserWarning, match="Could not find srid 99999"):
read_postgis(sql, con)
@mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_no_spatial_ref_sys_table_in_postgis(
self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
):
# mock for a non-existent spatial_ref_sys database
mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="epsg:4326")
create_postgis(con, df_nybb, srid=4326)
sql = "SELECT * FROM nybb;"
with pytest.warns(
UserWarning, match="Could not find the spatial reference system table"
):
df = read_postgis(sql, con)
assert df.crs == "EPSG:4326"
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
"""Test chunksize argument with non epsg crs"""
chunksize = 2
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="esri:54052")
create_postgis(con, df_nybb, srid=54052)
sql = "SELECT * FROM nybb;"
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
validate_boro_df(df)
assert df.crs == "ESRI:54052"