library packages

This commit is contained in:
2024-09-28 22:56:00 -07:00
parent 64d9b78b3a
commit 1973934e95
4893 changed files with 1184173 additions and 31 deletions

View File

@@ -0,0 +1,100 @@
"""
Script to create the data and write legacy storage (pickle) files.
Based on pandas' generate_legacy_storage_files.py script.
To use this script, create an environment for which you want to
generate pickles, activate the environment, and run this script as:
$ python geopandas/geopandas/io/tests/generate_legacy_storage_files.py \
geopandas/geopandas/io/tests/data/pickle/ pickle
This script generates a storage file for the current arch, system,
The idea here is you are using the *current* version of the
generate_legacy_storage_files with an *older* version of geopandas to
generate a pickle file. We will then check this file into a current
branch, and test using test_pickle.py. This will load the *older*
pickles and test versus the current data that is generated
(with master). These are then compared.
"""
import os
import pickle
import platform
import sys
import pandas as pd
from shapely.geometry import Point
import geopandas
def create_pickle_data():
"""create the pickle data"""
# custom geometry column name
gdf_the_geom = geopandas.GeoDataFrame(
{"a": [1, 2, 3], "the_geom": [Point(1, 1), Point(2, 2), Point(3, 3)]},
geometry="the_geom",
)
# with crs
gdf_crs = geopandas.GeoDataFrame(
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
crs="EPSG:4326",
)
return {"gdf_the_geom": gdf_the_geom, "gdf_crs": gdf_crs}
def platform_name():
return "_".join(
[
str(geopandas.__version__),
"pd-" + str(pd.__version__),
"py-" + str(platform.python_version()),
str(platform.machine()),
str(platform.system().lower()),
]
)
def write_legacy_pickles(output_dir):
print(
"This script generates a storage file for the current arch, system, "
"and python version"
)
print("geopandas version: {}").format(geopandas.__version__)
print(" output dir : {}".format(output_dir))
print(" storage format: pickle")
pth = "{}.pickle".format(platform_name())
fh = open(os.path.join(output_dir, pth), "wb")
pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
fh.close()
print("created pickle file: {}".format(pth))
def main():
if len(sys.argv) != 3:
sys.exit(
"Specify output directory and storage type: generate_legacy_"
"storage_files.py <output_dir> <storage_type> "
)
output_dir = str(sys.argv[1])
storage_type = str(sys.argv[2])
if storage_type == "pickle":
write_legacy_pickles(output_dir=output_dir)
else:
sys.exit("storage_type must be one of {'pickle'}")
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,328 @@
import os
from shapely.geometry import (
LineString,
MultiLineString,
MultiPoint,
MultiPolygon,
Point,
Polygon,
)
import geopandas
from geopandas import GeoDataFrame
from .test_file import FIONA_MARK, PYOGRIO_MARK
import pytest
from geopandas.testing import assert_geodataframe_equal
# Credit: Polygons below come from Montreal city Open Data portal
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
city_hall_boundaries = Polygon(
(
(-73.5541107525234, 45.5091983609661),
(-73.5546126200639, 45.5086813829106),
(-73.5540185061397, 45.5084409343852),
(-73.5539986525799, 45.5084323044531),
(-73.5535801792994, 45.5089539203786),
(-73.5541107525234, 45.5091983609661),
)
)
vauquelin_place = Polygon(
(
(-73.5542465586147, 45.5081555487952),
(-73.5540185061397, 45.5084409343852),
(-73.5546126200639, 45.5086813829106),
(-73.5548825850032, 45.5084033554357),
(-73.5542465586147, 45.5081555487952),
)
)
city_hall_walls = [
LineString(
(
(-73.5541107525234, 45.5091983609661),
(-73.5546126200639, 45.5086813829106),
(-73.5540185061397, 45.5084409343852),
)
),
LineString(
(
(-73.5539986525799, 45.5084323044531),
(-73.5535801792994, 45.5089539203786),
(-73.5541107525234, 45.5091983609661),
)
),
]
city_hall_entrance = Point(-73.553785, 45.508722)
city_hall_balcony = Point(-73.554138, 45.509080)
city_hall_council_chamber = Point(-73.554246, 45.508931)
point_3D = Point(-73.553785, 45.508722, 300)
# *****************************************
# TEST TOOLING
class _ExpectedError:
def __init__(self, error_type, error_message_match):
self.type = error_type
self.match = error_message_match
class _ExpectedErrorBuilder:
def __init__(self, composite_key):
self.composite_key = composite_key
def to_raise(self, error_type, error_match):
_expected_exceptions[self.composite_key] = _ExpectedError(
error_type, error_match
)
def _expect_writing(gdf, ogr_driver):
return _ExpectedErrorBuilder(_composite_key(gdf, ogr_driver))
def _composite_key(gdf, ogr_driver):
return frozenset([id(gdf), ogr_driver])
def _expected_error_on(gdf, ogr_driver):
composite_key = _composite_key(gdf, ogr_driver)
return _expected_exceptions.get(composite_key, None)
# *****************************************
# TEST CASES
_geodataframes_to_write = []
_expected_exceptions = {}
_CRS = "epsg:4326"
# ------------------
# gdf with Points
gdf = GeoDataFrame(
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_entrance, city_hall_balcony]
)
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with MultiPoints
gdf = GeoDataFrame(
{"a": [1, 2]},
crs=_CRS,
geometry=[
MultiPoint([city_hall_balcony, city_hall_council_chamber]),
MultiPoint([city_hall_entrance, city_hall_balcony, city_hall_council_chamber]),
],
)
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with Points and MultiPoints
gdf = GeoDataFrame(
{"a": [1, 2]},
crs=_CRS,
geometry=[MultiPoint([city_hall_entrance, city_hall_balcony]), city_hall_balcony],
)
_geodataframes_to_write.append(gdf)
# 'ESRI Shapefile' driver supports writing LineString/MultiLinestring and
# Polygon/MultiPolygon but does not mention Point/MultiPoint
# see https://www.gdal.org/drv_shapefile.html
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
# ------------------
# gdf with LineStrings
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=city_hall_walls)
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with MultiLineStrings
gdf = GeoDataFrame(
{"a": [1, 2]},
crs=_CRS,
geometry=[MultiLineString(city_hall_walls), MultiLineString(city_hall_walls)],
)
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with LineStrings and MultiLineStrings
gdf = GeoDataFrame(
{"a": [1, 2]},
crs=_CRS,
geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]],
)
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with Polygons
gdf = GeoDataFrame(
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_boundaries, vauquelin_place]
)
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with MultiPolygon
gdf = GeoDataFrame(
{"a": [1]},
crs=_CRS,
geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))],
)
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with Polygon and MultiPolygon
gdf = GeoDataFrame(
{"a": [1, 2]},
crs=_CRS,
geometry=[
MultiPolygon((city_hall_boundaries, vauquelin_place)),
city_hall_boundaries,
],
)
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with null geometry and Point
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, city_hall_entrance])
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with null geometry and 3D Point
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, point_3D])
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with null geometries only
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, None])
_geodataframes_to_write.append(gdf)
# ------------------
# gdf with all shape types mixed together
gdf = GeoDataFrame(
{"a": [1, 2, 3, 4, 5, 6]},
crs=_CRS,
geometry=[
MultiPolygon((city_hall_boundaries, vauquelin_place)),
city_hall_entrance,
MultiLineString(city_hall_walls),
city_hall_walls[0],
MultiPoint([city_hall_entrance, city_hall_balcony]),
city_hall_balcony,
],
)
_geodataframes_to_write.append(gdf)
# Not supported by 'ESRI Shapefile' driver
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
# ------------------
# gdf with all 2D shape types and 3D Point mixed together
gdf = GeoDataFrame(
{"a": [1, 2, 3, 4, 5, 6, 7]},
crs=_CRS,
geometry=[
MultiPolygon((city_hall_boundaries, vauquelin_place)),
city_hall_entrance,
MultiLineString(city_hall_walls),
city_hall_walls[0],
MultiPoint([city_hall_entrance, city_hall_balcony]),
city_hall_balcony,
point_3D,
],
)
_geodataframes_to_write.append(gdf)
# Not supported by 'ESRI Shapefile' driver
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
@pytest.fixture(params=_geodataframes_to_write)
def geodataframe(request):
return request.param
@pytest.fixture(
params=[
("GeoJSON", ".geojson"),
("ESRI Shapefile", ".shp"),
("GPKG", ".gpkg"),
("SQLite", ".sqlite"),
]
)
def ogr_driver(request):
return request.param
@pytest.fixture(
params=[
pytest.param("fiona", marks=FIONA_MARK),
pytest.param("pyogrio", marks=PYOGRIO_MARK),
]
)
def engine(request):
return request.param
def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
driver, ext = ogr_driver
output_file = os.path.join(str(tmpdir), "output_file" + ext)
write_kwargs = {}
if driver == "SQLite":
write_kwargs["spatialite"] = True
# This if statement can be removed once minimal fiona version >= 1.8.20
if engine == "fiona":
from packaging.version import Version
import fiona
if Version(fiona.__version__) < Version("1.8.20"):
pytest.skip("SQLite driver only available from version 1.8.20")
# If only 3D Points, geometry_type needs to be specified for spatialite at the
# moment. This if can be removed once the following PR is released:
# https://github.com/geopandas/pyogrio/pull/223
if (
engine == "pyogrio"
and len(geodataframe == 2)
and geodataframe.geometry[0] is None
and geodataframe.geometry[1] is not None
and geodataframe.geometry[1].has_z
):
write_kwargs["geometry_type"] = "Point Z"
expected_error = _expected_error_on(geodataframe, driver)
if expected_error:
with pytest.raises(
RuntimeError, match="Failed to write record|Could not add feature to layer"
):
geodataframe.to_file(
output_file, driver=driver, engine=engine, **write_kwargs
)
else:
if driver == "SQLite" and engine == "pyogrio":
try:
geodataframe.to_file(
output_file, driver=driver, engine=engine, **write_kwargs
)
except ValueError as e:
if "unrecognized option 'SPATIALITE'" in str(e):
pytest.xfail(
"pyogrio wheels from PyPI do not come with SpatiaLite support. "
f"Error: {e}"
)
raise
else:
geodataframe.to_file(
output_file, driver=driver, engine=engine, **write_kwargs
)
reloaded = geopandas.read_file(output_file, engine=engine)
if driver == "GeoJSON" and engine == "pyogrio":
# For GeoJSON files, the int64 column comes back as int32
reloaded["a"] = reloaded["a"].astype("int64")
assert_geodataframe_equal(geodataframe, reloaded, check_column_type="equiv")

View File

@@ -0,0 +1,537 @@
import contextlib
import json
import os
import pathlib
from packaging.version import Version
import numpy as np
import shapely
from shapely import MultiPoint, Point, box
from geopandas import GeoDataFrame, GeoSeries
import pytest
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
pytest.importorskip("pyarrow")
import pyarrow as pa
import pyarrow.compute as pc
from pyarrow import feather
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
def pa_table(table):
if Version(pa.__version__) < Version("14.0.0"):
return table._pa_table
else:
return pa.table(table)
def pa_array(array):
if Version(pa.__version__) < Version("14.0.0"):
return array._pa_array
else:
return pa.array(array)
def assert_table_equal(left, right, check_metadata=True):
geom_type = left["geometry"].type
# in case of Points (directly the inner fixed_size_list or struct type)
# -> there are NaNs for empties -> we need to compare them separately
# and then fill, because pyarrow.Table.equals considers NaNs as not equal
if pa.types.is_fixed_size_list(geom_type):
left_values = left["geometry"].chunk(0).values
right_values = right["geometry"].chunk(0).values
assert pc.is_nan(left_values).equals(pc.is_nan(right_values))
left_geoms = pa.FixedSizeListArray.from_arrays(
pc.replace_with_mask(left_values, pc.is_nan(left_values), 0.0),
type=left["geometry"].type,
)
right_geoms = pa.FixedSizeListArray.from_arrays(
pc.replace_with_mask(right_values, pc.is_nan(right_values), 0.0),
type=right["geometry"].type,
)
left = left.set_column(1, left.schema.field("geometry"), left_geoms)
right = right.set_column(1, right.schema.field("geometry"), right_geoms)
elif pa.types.is_struct(geom_type):
left_arr = left["geometry"].chunk(0)
right_arr = right["geometry"].chunk(0)
for i in range(left_arr.type.num_fields):
assert pc.is_nan(left_arr.field(i)).equals(pc.is_nan(right_arr.field(i)))
left_geoms = pa.StructArray.from_arrays(
[
pc.replace_with_mask(
left_arr.field(i), pc.is_nan(left_arr.field(i)), 0.0
)
for i in range(left_arr.type.num_fields)
],
fields=list(left["geometry"].type),
)
right_geoms = pa.StructArray.from_arrays(
[
pc.replace_with_mask(
right_arr.field(i), pc.is_nan(right_arr.field(i)), 0.0
)
for i in range(right_arr.type.num_fields)
],
fields=list(right["geometry"].type),
)
left = left.set_column(1, left.schema.field("geometry"), left_geoms)
right = right.set_column(1, right.schema.field("geometry"), right_geoms)
if left.equals(right, check_metadata=check_metadata):
return
if not left.schema.equals(right.schema):
raise AssertionError(
"Schema not equal\nLeft:\n{0}\nRight:\n{1}".format(
left.schema, right.schema
)
)
if check_metadata:
if not left.schema.equals(right.schema, check_metadata=True):
if not left.schema.metadata == right.schema.metadata:
raise AssertionError(
"Metadata not equal\nLeft:\n{0}\nRight:\n{1}".format(
left.schema.metadata, right.schema.metadata
)
)
for col in left.schema.names:
assert left.schema.field(col).equals(
right.schema.field(col), check_metadata=True
)
for col in left.column_names:
a_left = pa.concat_arrays(left.column(col).chunks)
a_right = pa.concat_arrays(right.column(col).chunks)
if not a_left.equals(a_right):
raise AssertionError(
"Column '{0}' not equal:\n{1}".format(col, a_left.diff(a_right))
)
raise AssertionError("Tables not equal for unknown reason")
@pytest.mark.skipif(
shapely.geos_version < (3, 9, 0),
reason="Checking for empty is buggy with GEOS<3.9",
) # an old GEOS is installed in the CI builds with the defaults channel
@pytest.mark.parametrize(
"dim",
[
"xy",
pytest.param(
"xyz",
marks=pytest.mark.skipif(
shapely.geos_version < (3, 10, 0),
reason="Cannot write 3D geometries with GEOS<3.10",
),
),
],
)
@pytest.mark.parametrize(
"geometry_type",
["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
)
@pytest.mark.parametrize(
"geometry_encoding, interleaved",
[("WKB", None), ("geoarrow", True), ("geoarrow", False)],
ids=["WKB", "geoarrow-interleaved", "geoarrow-separated"],
)
def test_geoarrow_export(geometry_type, dim, geometry_encoding, interleaved):
base_path = DATA_PATH / "geoarrow"
suffix = geometry_type + ("_z" if dim == "xyz" else "")
# Read the example data
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
df["row_number"] = df["row_number"].astype("int32")
df = GeoDataFrame(df)
df.geometry.array.crs = None
# Read the expected data
if geometry_encoding == "WKB":
filename = f"example-{suffix}-wkb.arrow"
else:
filename = f"example-{suffix}{'-interleaved' if interleaved else ''}.arrow"
expected = feather.read_table(base_path / filename)
# GeoDataFrame -> Arrow Table
result = pa_table(
df.to_arrow(geometry_encoding=geometry_encoding, interleaved=interleaved)
)
# remove the "pandas" metadata
result = result.replace_schema_metadata(None)
mask_nonempty = None
if (
geometry_encoding == "WKB"
and dim == "xyz"
and geometry_type.startswith("multi")
):
# for collections with z dimension, drop the empties because those don't
# roundtrip correctly to WKB
# (https://github.com/libgeos/geos/issues/888)
mask_nonempty = pa.array(np.asarray(~df.geometry.is_empty))
result = result.filter(mask_nonempty)
expected = expected.filter(mask_nonempty)
assert_table_equal(result, expected)
# GeoSeries -> Arrow array
if geometry_encoding != "WKB" and geometry_type == "point":
# for points, we again have to handle NaNs separately, we already did that
# for table so let's just skip this part
return
result_arr = pa_array(
df.geometry.to_arrow(
geometry_encoding=geometry_encoding, interleaved=interleaved
)
)
if mask_nonempty is not None:
result_arr = result_arr.filter(mask_nonempty)
assert result_arr.equals(expected["geometry"].chunk(0))
@pytest.mark.skipif(
Version(shapely.__version__) < Version("2.0.2"),
reason="from_ragged_array failing with read-only array input",
)
@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
def test_geoarrow_multiple_geometry_crs(encoding):
pytest.importorskip("pyproj")
# ensure each geometry column has its own crs
gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
gdf["geom2"] = gdf.geometry.to_crs("epsg:3857")
result = pa_table(gdf.to_arrow(geometry_encoding=encoding))
meta1 = json.loads(
result.schema.field("geometry").metadata[b"ARROW:extension:metadata"]
)
assert json.loads(meta1["crs"])["id"]["code"] == 4326
meta2 = json.loads(
result.schema.field("geom2").metadata[b"ARROW:extension:metadata"]
)
assert json.loads(meta2["crs"])["id"]["code"] == 3857
roundtripped = GeoDataFrame.from_arrow(result)
assert_geodataframe_equal(gdf, roundtripped)
assert gdf.geometry.crs == "epsg:4326"
assert gdf.geom2.crs == "epsg:3857"
@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
def test_geoarrow_series_name_crs(encoding):
pytest.importorskip("pyproj")
pytest.importorskip("pyarrow", minversion="14.0.0")
gser = GeoSeries([box(0, 0, 10, 10)], crs="epsg:4326", name="geom")
schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
field = pa.Field._import_from_c_capsule(schema_capsule)
assert field.name == "geom"
assert (
field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
if encoding == "WKB"
else b"geoarrow.polygon"
)
meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
assert json.loads(meta["crs"])["id"]["code"] == 4326
# ensure it also works without a name
gser = GeoSeries([box(0, 0, 10, 10)])
schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
field = pa.Field._import_from_c_capsule(schema_capsule)
assert field.name == ""
def test_geoarrow_unsupported_encoding():
gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
with pytest.raises(ValueError, match="Expected geometry encoding"):
gdf.to_arrow(geometry_encoding="invalid")
with pytest.raises(ValueError, match="Expected geometry encoding"):
gdf.geometry.to_arrow(geometry_encoding="invalid")
def test_geoarrow_mixed_geometry_types():
gdf = GeoDataFrame(
{"geometry": [Point(0, 0), box(0, 0, 10, 10)]},
crs="epsg:4326",
)
with pytest.raises(ValueError, match="Geometry type combination is not supported"):
gdf.to_arrow(geometry_encoding="geoarrow")
gdf = GeoDataFrame(
{"geometry": [Point(0, 0), MultiPoint([(0, 0), (1, 1)])]},
crs="epsg:4326",
)
result = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
assert (
result.schema.field("geometry").metadata[b"ARROW:extension:name"]
== b"geoarrow.multipoint"
)
@pytest.mark.parametrize("geom_type", ["point", "polygon"])
@pytest.mark.parametrize(
"encoding, interleaved", [("WKB", True), ("geoarrow", True), ("geoarrow", False)]
)
def test_geoarrow_missing(encoding, interleaved, geom_type):
# dummy test for single geometry type until missing values are included
# in the test data for test_geoarrow_export
gdf = GeoDataFrame(
geometry=[Point(0, 0) if geom_type == "point" else box(0, 0, 10, 10), None],
crs="epsg:4326",
)
if (
encoding == "geoarrow"
and geom_type == "point"
and interleaved
and Version(pa.__version__) < Version("15.0.0")
):
with pytest.raises(
ValueError,
match="Converting point geometries with missing values is not supported",
):
gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved)
return
result = pa_table(gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved))
assert result["geometry"].null_count == 1
assert result["geometry"].is_null().to_pylist() == [False, True]
def test_geoarrow_include_z():
gdf = GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1), Point()]})
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
assert table["geometry"].type.value_field.name == "xy"
assert table["geometry"].type.list_size == 2
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=True))
assert table["geometry"].type.value_field.name == "xyz"
assert table["geometry"].type.list_size == 3
assert np.isnan(table["geometry"].chunk(0).values.to_numpy()[2::3]).all()
gdf = GeoDataFrame({"geometry": [Point(0, 0, 0), Point(1, 1, 1), Point()]})
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
assert table["geometry"].type.value_field.name == "xyz"
assert table["geometry"].type.list_size == 3
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=False))
assert table["geometry"].type.value_field.name == "xy"
assert table["geometry"].type.list_size == 2
@contextlib.contextmanager
def with_geoarrow_extension_types():
gp = pytest.importorskip("geoarrow.pyarrow")
gp.register_extension_types()
try:
yield
finally:
gp.unregister_extension_types()
@pytest.mark.parametrize("dim", ["xy", "xyz"])
@pytest.mark.parametrize(
"geometry_type",
["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
)
def test_geoarrow_export_with_extension_types(geometry_type, dim):
# ensure the exported data can be imported by geoarrow-pyarrow and are
# recognized as extension types
base_path = DATA_PATH / "geoarrow"
suffix = geometry_type + ("_z" if dim == "xyz" else "")
# Read the example data
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
df["row_number"] = df["row_number"].astype("int32")
df = GeoDataFrame(df)
df.geometry.array.crs = None
pytest.importorskip("geoarrow.pyarrow")
with with_geoarrow_extension_types():
result1 = pa_table(df.to_arrow(geometry_encoding="WKB"))
assert isinstance(result1["geometry"].type, pa.ExtensionType)
result2 = pa_table(df.to_arrow(geometry_encoding="geoarrow"))
assert isinstance(result2["geometry"].type, pa.ExtensionType)
result3 = pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
assert isinstance(result3["geometry"].type, pa.ExtensionType)
@pytest.mark.skipif(
Version(shapely.__version__) < Version("2.0.2"),
reason="from_ragged_array failing with read-only array input",
)
@pytest.mark.parametrize("dim", ["xy", "xyz"])
@pytest.mark.parametrize(
"geometry_type",
[
"point",
"linestring",
"polygon",
"multipoint",
"multilinestring",
"multipolygon",
],
)
def test_geoarrow_import(geometry_type, dim):
base_path = DATA_PATH / "geoarrow"
suffix = geometry_type + ("_z" if dim == "xyz" else "")
# Read the example data
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
df = GeoDataFrame(df)
df.geometry.crs = None
table1 = feather.read_table(base_path / f"example-{suffix}-wkb.arrow")
result1 = GeoDataFrame.from_arrow(table1)
assert_geodataframe_equal(result1, df)
table2 = feather.read_table(base_path / f"example-{suffix}-interleaved.arrow")
result2 = GeoDataFrame.from_arrow(table2)
assert_geodataframe_equal(result2, df)
table3 = feather.read_table(base_path / f"example-{suffix}.arrow")
result3 = GeoDataFrame.from_arrow(table3)
assert_geodataframe_equal(result3, df)
@pytest.mark.skipif(
Version(shapely.__version__) < Version("2.0.2"),
reason="from_ragged_array failing with read-only array input",
)
@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
def test_geoarrow_import_geometry_column(encoding):
pytest.importorskip("pyproj")
# ensure each geometry column has its own crs
gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)])
gdf["centroid"] = gdf.geometry.centroid
result = GeoDataFrame.from_arrow(pa_table(gdf.to_arrow(geometry_encoding=encoding)))
assert_geodataframe_equal(result, gdf)
assert result.active_geometry_name == "geometry"
result = GeoDataFrame.from_arrow(
pa_table(gdf[["centroid"]].to_arrow(geometry_encoding=encoding))
)
assert result.active_geometry_name == "centroid"
result = GeoDataFrame.from_arrow(
pa_table(gdf.to_arrow(geometry_encoding=encoding)), geometry="centroid"
)
assert result.active_geometry_name == "centroid"
assert_geodataframe_equal(result, gdf.set_geometry("centroid"))
def test_geoarrow_import_missing_geometry():
pytest.importorskip("pyarrow", minversion="14.0.0")
table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
with pytest.raises(ValueError, match="No geometry column found"):
GeoDataFrame.from_arrow(table)
with pytest.raises(ValueError, match="No GeoArrow geometry field found"):
GeoSeries.from_arrow(table["a"].chunk(0))
def test_geoarrow_import_capsule_interface():
# ensure we can import non-pyarrow object
pytest.importorskip("pyarrow", minversion="14.0.0")
gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
result = GeoDataFrame.from_arrow(gdf.to_arrow())
assert_geodataframe_equal(result, gdf)
@pytest.mark.parametrize("dim", ["xy", "xyz"])
@pytest.mark.parametrize(
"geometry_type",
["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
)
def test_geoarrow_import_from_extension_types(geometry_type, dim):
# ensure the exported data can be imported by geoarrow-pyarrow and are
# recognized as extension types
pytest.importorskip("pyproj")
base_path = DATA_PATH / "geoarrow"
suffix = geometry_type + ("_z" if dim == "xyz" else "")
# Read the example data
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
df = GeoDataFrame(df, crs="EPSG:3857")
pytest.importorskip("geoarrow.pyarrow")
with with_geoarrow_extension_types():
result1 = GeoDataFrame.from_arrow(
pa_table(df.to_arrow(geometry_encoding="WKB"))
)
assert_geodataframe_equal(result1, df)
result2 = GeoDataFrame.from_arrow(
pa_table(df.to_arrow(geometry_encoding="geoarrow"))
)
assert_geodataframe_equal(result2, df)
result3 = GeoDataFrame.from_arrow(
pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
)
assert_geodataframe_equal(result3, df)
def test_geoarrow_import_geoseries():
pytest.importorskip("pyproj")
gp = pytest.importorskip("geoarrow.pyarrow")
ser = GeoSeries.from_wkt(["POINT (1 1)", "POINT (2 2)"], crs="EPSG:3857")
with with_geoarrow_extension_types():
arr = gp.array(ser.to_arrow(geometry_encoding="WKB"))
result = GeoSeries.from_arrow(arr)
assert_geoseries_equal(result, ser)
arr = gp.array(ser.to_arrow(geometry_encoding="geoarrow"))
result = GeoSeries.from_arrow(arr)
assert_geoseries_equal(result, ser)
# the name is lost when going through a pyarrow.Array
ser.name = "name"
arr = gp.array(ser.to_arrow())
result = GeoSeries.from_arrow(arr)
assert result.name is None
# we can specify the name as one of the kwargs
result = GeoSeries.from_arrow(arr, name="test")
assert_geoseries_equal(result, ser)
def test_geoarrow_import_unknown_geoarrow_type():
gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
table = pa_table(gdf.to_arrow())
schema = table.schema
new_field = schema.field("geometry").with_metadata(
{
b"ARROW:extension:name": b"geoarrow.unknown",
b"ARROW:extension:metadata": b"{}",
}
)
new_schema = pa.schema([schema.field(0), new_field])
new_table = table.cast(new_schema)
with pytest.raises(TypeError, match="Unknown GeoArrow extension type"):
GeoDataFrame.from_arrow(new_table)

View File

@@ -0,0 +1,306 @@
from collections import OrderedDict
import numpy as np
import pandas as pd
from shapely.geometry import (
LineString,
MultiLineString,
MultiPoint,
MultiPolygon,
Point,
Polygon,
)
from geopandas import GeoDataFrame
from geopandas.io.file import infer_schema
import pytest
# Credit: Polygons below come from Montreal city Open Data portal
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
city_hall_boundaries = Polygon(
(
(-73.5541107525234, 45.5091983609661),
(-73.5546126200639, 45.5086813829106),
(-73.5540185061397, 45.5084409343852),
(-73.5539986525799, 45.5084323044531),
(-73.5535801792994, 45.5089539203786),
(-73.5541107525234, 45.5091983609661),
)
)
vauquelin_place = Polygon(
(
(-73.5542465586147, 45.5081555487952),
(-73.5540185061397, 45.5084409343852),
(-73.5546126200639, 45.5086813829106),
(-73.5548825850032, 45.5084033554357),
(-73.5542465586147, 45.5081555487952),
)
)
city_hall_walls = [
LineString(
(
(-73.5541107525234, 45.5091983609661),
(-73.5546126200639, 45.5086813829106),
(-73.5540185061397, 45.5084409343852),
)
),
LineString(
(
(-73.5539986525799, 45.5084323044531),
(-73.5535801792994, 45.5089539203786),
(-73.5541107525234, 45.5091983609661),
)
),
]
city_hall_entrance = Point(-73.553785, 45.508722)
city_hall_balcony = Point(-73.554138, 45.509080)
city_hall_council_chamber = Point(-73.554246, 45.508931)
point_3D = Point(-73.553785, 45.508722, 300)
linestring_3D = LineString(
(
(-73.5541107525234, 45.5091983609661, 300),
(-73.5546126200639, 45.5086813829106, 300),
(-73.5540185061397, 45.5084409343852, 300),
)
)
polygon_3D = Polygon(
(
(-73.5541107525234, 45.5091983609661, 300),
(-73.5535801792994, 45.5089539203786, 300),
(-73.5541107525234, 45.5091983609661, 300),
)
)
def test_infer_schema_only_points():
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
def test_infer_schema_points_and_multipoints():
df = GeoDataFrame(
geometry=[
MultiPoint([city_hall_entrance, city_hall_balcony]),
city_hall_balcony,
]
)
assert infer_schema(df) == {
"geometry": ["MultiPoint", "Point"],
"properties": OrderedDict(),
}
def test_infer_schema_only_multipoints():
df = GeoDataFrame(
geometry=[
MultiPoint(
[city_hall_entrance, city_hall_balcony, city_hall_council_chamber]
)
]
)
assert infer_schema(df) == {"geometry": "MultiPoint", "properties": OrderedDict()}
def test_infer_schema_only_linestrings():
df = GeoDataFrame(geometry=city_hall_walls)
assert infer_schema(df) == {"geometry": "LineString", "properties": OrderedDict()}
def test_infer_schema_linestrings_and_multilinestrings():
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]])
assert infer_schema(df) == {
"geometry": ["MultiLineString", "LineString"],
"properties": OrderedDict(),
}
def test_infer_schema_only_multilinestrings():
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls)])
assert infer_schema(df) == {
"geometry": "MultiLineString",
"properties": OrderedDict(),
}
def test_infer_schema_only_polygons():
df = GeoDataFrame(geometry=[city_hall_boundaries, vauquelin_place])
assert infer_schema(df) == {"geometry": "Polygon", "properties": OrderedDict()}
def test_infer_schema_polygons_and_multipolygons():
df = GeoDataFrame(
geometry=[
MultiPolygon((city_hall_boundaries, vauquelin_place)),
city_hall_boundaries,
]
)
assert infer_schema(df) == {
"geometry": ["MultiPolygon", "Polygon"],
"properties": OrderedDict(),
}
def test_infer_schema_only_multipolygons():
df = GeoDataFrame(geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))])
assert infer_schema(df) == {"geometry": "MultiPolygon", "properties": OrderedDict()}
def test_infer_schema_multiple_shape_types():
df = GeoDataFrame(
geometry=[
MultiPolygon((city_hall_boundaries, vauquelin_place)),
city_hall_boundaries,
MultiLineString(city_hall_walls),
city_hall_walls[0],
MultiPoint([city_hall_entrance, city_hall_balcony]),
city_hall_balcony,
]
)
assert infer_schema(df) == {
"geometry": [
"MultiPolygon",
"Polygon",
"MultiLineString",
"LineString",
"MultiPoint",
"Point",
],
"properties": OrderedDict(),
}
def test_infer_schema_mixed_3D_shape_type():
df = GeoDataFrame(
geometry=[
MultiPolygon((city_hall_boundaries, vauquelin_place)),
city_hall_boundaries,
MultiLineString(city_hall_walls),
city_hall_walls[0],
MultiPoint([city_hall_entrance, city_hall_balcony]),
city_hall_balcony,
point_3D,
]
)
assert infer_schema(df) == {
"geometry": [
"3D Point",
"MultiPolygon",
"Polygon",
"MultiLineString",
"LineString",
"MultiPoint",
"Point",
],
"properties": OrderedDict(),
}
def test_infer_schema_mixed_3D_Point():
df = GeoDataFrame(geometry=[city_hall_balcony, point_3D])
assert infer_schema(df) == {
"geometry": ["3D Point", "Point"],
"properties": OrderedDict(),
}
def test_infer_schema_only_3D_Points():
df = GeoDataFrame(geometry=[point_3D, point_3D])
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
def test_infer_schema_mixed_3D_linestring():
df = GeoDataFrame(geometry=[city_hall_walls[0], linestring_3D])
assert infer_schema(df) == {
"geometry": ["3D LineString", "LineString"],
"properties": OrderedDict(),
}
def test_infer_schema_only_3D_linestrings():
df = GeoDataFrame(geometry=[linestring_3D, linestring_3D])
assert infer_schema(df) == {
"geometry": "3D LineString",
"properties": OrderedDict(),
}
def test_infer_schema_mixed_3D_Polygon():
df = GeoDataFrame(geometry=[city_hall_boundaries, polygon_3D])
assert infer_schema(df) == {
"geometry": ["3D Polygon", "Polygon"],
"properties": OrderedDict(),
}
def test_infer_schema_only_3D_Polygons():
df = GeoDataFrame(geometry=[polygon_3D, polygon_3D])
assert infer_schema(df) == {"geometry": "3D Polygon", "properties": OrderedDict()}
def test_infer_schema_null_geometry_and_2D_point():
df = GeoDataFrame(geometry=[None, city_hall_entrance])
# None geometry type is then omitted
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
def test_infer_schema_null_geometry_and_3D_point():
df = GeoDataFrame(geometry=[None, point_3D])
# None geometry type is then omitted
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
def test_infer_schema_null_geometry_all():
df = GeoDataFrame(geometry=[None, None])
# None geometry type in then replaced by 'Unknown'
# (default geometry type supported by Fiona)
assert infer_schema(df) == {"geometry": "Unknown", "properties": OrderedDict()}
@pytest.mark.parametrize(
"array_data,dtype", [([1, 2**31 - 1], np.int32), ([1, np.nan], pd.Int32Dtype())]
)
def test_infer_schema_int32(array_data, dtype):
int32col = pd.array(data=array_data, dtype=dtype)
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
df["int32_column"] = int32col
assert infer_schema(df) == {
"geometry": "Point",
"properties": OrderedDict([("int32_column", "int32")]),
}
def test_infer_schema_int64():
int64col = pd.array([1, np.nan], dtype=pd.Int64Dtype())
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
df["int64_column"] = int64col
assert infer_schema(df) == {
"geometry": "Point",
"properties": OrderedDict([("int64_column", "int")]),
}

View File

@@ -0,0 +1,56 @@
"""
See generate_legacy_storage_files.py for the creation of the legacy files.
"""
import glob
import os
import pathlib
import pandas as pd
import pytest
from geopandas.testing import assert_geodataframe_equal
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
@pytest.fixture(scope="module")
def current_pickle_data():
# our current version pickle data
from .generate_legacy_storage_files import create_pickle_data
return create_pickle_data()
files = glob.glob(str(DATA_PATH / "pickle" / "*.pickle"))
@pytest.fixture(params=files, ids=[p.split("/")[-1] for p in files])
def legacy_pickle(request):
return request.param
@pytest.mark.skip(
reason=(
"shapely 2.0/pygeos-based unpickling currently only works for "
"shapely-2.0/pygeos-written files"
),
)
def test_legacy_pickles(current_pickle_data, legacy_pickle):
result = pd.read_pickle(legacy_pickle)
for name, value in result.items():
expected = current_pickle_data[name]
assert_geodataframe_equal(value, expected)
def test_round_trip_current(tmpdir, current_pickle_data):
data = current_pickle_data
for name, value in data.items():
path = str(tmpdir / "{}.pickle".format(name))
value.to_pickle(path)
result = pd.read_pickle(path)
assert_geodataframe_equal(result, value)
assert isinstance(result.has_sindex, bool)

View File

@@ -0,0 +1,878 @@
"""
Tests here include reading/writing to different types of spatial databases.
The spatial database tests may not work without additional system
configuration. postGIS tests require a test database to have been setup;
see geopandas.tests.util for more information.
"""
import os
import warnings
from importlib.util import find_spec
import pandas as pd
import geopandas
import geopandas._compat as compat
from geopandas import GeoDataFrame, read_file, read_postgis
from geopandas._compat import HAS_PYPROJ
from geopandas.io.sql import _get_conn as get_conn
from geopandas.io.sql import _write_postgis as write_postgis
import pytest
from geopandas.tests.util import (
create_postgis,
create_spatialite,
mock,
validate_boro_df,
)
try:
from sqlalchemy import text
except ImportError:
# Avoid local imports for text in all sqlalchemy tests
# all tests using text use engine_postgis, which ensures sqlalchemy is available
text = str
@pytest.fixture
def df_nybb(nybb_filename):
df = read_file(nybb_filename)
return df
def check_available_postgis_drivers() -> list[str]:
"""Work out which of psycopg2 and psycopg are available.
This prevents tests running if the relevant package isn't installed
(rather than being skipped, as skips are treated as failures during postgis CI)
"""
drivers = []
if find_spec("psycopg"):
drivers.append("psycopg")
if find_spec("psycopg2"):
drivers.append("psycopg2")
return drivers
POSTGIS_DRIVERS = check_available_postgis_drivers()
def prepare_database_credentials() -> dict:
"""Gather postgres connection credentials from environment variables."""
return {
"dbname": "test_geopandas",
"user": os.environ.get("PGUSER"),
"password": os.environ.get("PGPASSWORD"),
"host": os.environ.get("PGHOST"),
"port": os.environ.get("PGPORT"),
}
@pytest.fixture()
def connection_postgis(request):
"""Create a postgres connection using either psycopg2 or psycopg.
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
psycopg = pytest.importorskip(request.param)
try:
con = psycopg.connect(**prepare_database_credentials())
except psycopg.OperationalError:
pytest.skip("Cannot connect with postgresql database")
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message="pandas only supports SQLAlchemy connectable.*"
)
yield con
con.close()
@pytest.fixture()
def engine_postgis(request):
"""
Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
"""
sqlalchemy = pytest.importorskip("sqlalchemy")
from sqlalchemy.engine.url import URL
credentials = prepare_database_credentials()
try:
con = sqlalchemy.create_engine(
URL.create(
drivername=f"postgresql+{request.param}",
username=credentials["user"],
database=credentials["dbname"],
password=credentials["password"],
host=credentials["host"],
port=credentials["port"],
)
)
con.connect()
except Exception:
pytest.skip("Cannot connect with postgresql database")
yield con
con.dispose()
@pytest.fixture()
def connection_spatialite():
"""
Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized.
`The sqlite3 module must be built with loadable extension support
<https://docs.python.org/3/library/sqlite3.html#f1>`_ and
`SpatiaLite <https://www.gaia-gis.it/fossil/libspatialite/index>`_
must be available on the system as a SQLite module.
Packages available on Anaconda meet requirements.
Exceptions
----------
``AttributeError`` on missing support for loadable SQLite extensions
``sqlite3.OperationalError`` on missing SpatiaLite
"""
sqlite3 = pytest.importorskip("sqlite3")
try:
with sqlite3.connect(":memory:") as con:
con.enable_load_extension(True)
con.load_extension("mod_spatialite")
con.execute("SELECT InitSpatialMetaData(TRUE)")
except Exception:
con.close()
pytest.skip("Cannot setup spatialite database")
yield con
con.close()
def drop_table_if_exists(conn_or_engine, table):
sqlalchemy = pytest.importorskip("sqlalchemy")
if sqlalchemy.inspect(conn_or_engine).has_table(table):
metadata = sqlalchemy.MetaData()
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message="Did not recognize type 'geometry' of column.*"
)
metadata.reflect(conn_or_engine)
table = metadata.tables.get(table)
if table is not None:
table.drop(conn_or_engine, checkfirst=True)
@pytest.fixture
def df_mixed_single_and_multi():
from shapely.geometry import LineString, MultiLineString, Point
df = geopandas.GeoDataFrame(
{
"geometry": [
LineString([(0, 0), (1, 1)]),
MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
Point(0, 1),
]
},
crs="epsg:4326",
)
return df
@pytest.fixture
def df_geom_collection():
from shapely.geometry import GeometryCollection, LineString, Point, Polygon
df = geopandas.GeoDataFrame(
{
"geometry": [
GeometryCollection(
[
Polygon([(0, 0), (1, 1), (0, 1)]),
LineString([(0, 0), (1, 1)]),
Point(0, 0),
]
)
]
},
crs="epsg:4326",
)
return df
@pytest.fixture
def df_linear_ring():
from shapely.geometry import LinearRing
df = geopandas.GeoDataFrame(
{"geometry": [LinearRing(((0, 0), (0, 1), (1, 1), (1, 0)))]}, crs="epsg:4326"
)
return df
@pytest.fixture
def df_3D_geoms():
from shapely.geometry import LineString, Point, Polygon
df = geopandas.GeoDataFrame(
{
"geometry": [
LineString([(0, 0, 0), (1, 1, 1)]),
Polygon([(0, 0, 0), (1, 1, 1), (0, 1, 1)]),
Point(0, 1, 2),
]
},
crs="epsg:4326",
)
return df
class TestIO:
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_get_conn(self, engine_postgis):
Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
engine = engine_postgis
with get_conn(engine) as output:
assert isinstance(output, Connection)
with engine.connect() as conn:
with get_conn(conn) as output:
assert isinstance(output, Connection)
with pytest.raises(ValueError):
with get_conn(object()):
pass
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_default(self, connection_postgis, df_nybb):
con = connection_postgis
create_postgis(con, df_nybb)
sql = "SELECT * FROM nybb;"
df = read_postgis(sql, con)
validate_boro_df(df)
# no crs defined on the created geodatabase, and none specified
# by user; should not be set to 0, as from get_srid failure
assert df.crs is None
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
con = connection_postgis
geom_col = "the_geom"
create_postgis(con, df_nybb, geom_col=geom_col)
sql = "SELECT * FROM nybb;"
df = read_postgis(sql, con, geom_col=geom_col)
validate_boro_df(df)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
"""Tests that a SELECT {geom} AS {some_other_geom} works."""
con = connection_postgis
orig_geom = "geom"
out_geom = "the_geom"
create_postgis(con, df_nybb, geom_col=orig_geom)
sql = """SELECT borocode, boroname, shape_leng, shape_area,
{} as {} FROM nybb;""".format(
orig_geom, out_geom
)
df = read_postgis(sql, con, geom_col=out_geom)
validate_boro_df(df)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
"""Tests that an SRID can be read from a geodatabase (GH #451)."""
con = connection_postgis
crs = "epsg:4269"
df_reproj = df_nybb.to_crs(crs)
create_postgis(con, df_reproj, srid=4269)
sql = "SELECT * FROM nybb;"
df = read_postgis(sql, con)
validate_boro_df(df)
assert df.crs == crs
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
"""Tests that a user specified CRS overrides the geodatabase SRID."""
con = connection_postgis
orig_crs = df_nybb.crs
create_postgis(con, df_nybb, srid=4269)
sql = "SELECT * FROM nybb;"
df = read_postgis(sql, con, crs=orig_crs)
validate_boro_df(df)
assert df.crs == orig_crs
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_from_postgis_default(self, connection_postgis, df_nybb):
con = connection_postgis
create_postgis(con, df_nybb)
sql = "SELECT * FROM nybb;"
df = GeoDataFrame.from_postgis(sql, con)
validate_boro_df(df, case_sensitive=False)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
con = connection_postgis
geom_col = "the_geom"
create_postgis(con, df_nybb, geom_col=geom_col)
sql = "SELECT * FROM nybb;"
df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
validate_boro_df(df, case_sensitive=False)
def test_read_postgis_null_geom(self, connection_spatialite, df_nybb):
"""Tests that geometry with NULL is accepted."""
con = connection_spatialite
geom_col = df_nybb.geometry.name
df_nybb.geometry.iat[0] = None
create_spatialite(con, df_nybb)
sql = (
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col)
)
df = read_postgis(sql, con, geom_col=geom_col)
validate_boro_df(df)
def test_read_postgis_binary(self, connection_spatialite, df_nybb):
"""Tests that geometry read as binary is accepted."""
con = connection_spatialite
geom_col = df_nybb.geometry.name
create_spatialite(con, df_nybb)
sql = (
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col)
)
df = read_postgis(sql, con, geom_col=geom_col)
validate_boro_df(df)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
"""Test chunksize argument"""
chunksize = 2
con = connection_postgis
create_postgis(con, df_nybb)
sql = "SELECT * FROM nybb;"
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
validate_boro_df(df)
# no crs defined on the created geodatabase, and none specified
# by user; should not be set to 0, as from get_srid failure
assert df.crs is None
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_default(self, engine_postgis, df_nybb):
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
engine = engine_postgis
table = "nybb"
# If table exists, delete it before trying to write with defaults
drop_table_if_exists(engine, table)
# Write to db
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
# Validate
sql = text("SELECT * FROM {table};".format(table=table))
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
"""Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
engine = engine_postgis
table = "aTestTable"
# If table exists, delete it before trying to write with defaults
drop_table_if_exists(engine, table)
# Write to db
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
# Validate
sql = text('SELECT * FROM "{table}";'.format(table=table))
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
with engine_postgis.begin() as con:
table = "nybb_con"
# If table exists, delete it before trying to write with defaults
drop_table_if_exists(con, table)
# Write to db
write_postgis(df_nybb, con=con, name=table, if_exists="fail")
# Validate
sql = text("SELECT * FROM {table};".format(table=table))
df = read_postgis(sql, con, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
"""
Tests that uploading the same table raises error when: if_replace='fail'.
"""
engine = engine_postgis
table = "nybb"
# Ensure table exists
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
try:
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
except ValueError as e:
if "already exists" in str(e):
pass
else:
raise e
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
"""
Tests that replacing a table is possible when: if_replace='replace'.
"""
engine = engine_postgis
table = "nybb"
# Ensure table exists
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# Overwrite
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# Validate
sql = text("SELECT * FROM {table};".format(table=table))
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
"""
Tests that appending to existing table produces correct results when:
if_replace='append'.
"""
engine = engine_postgis
table = "nybb"
orig_rows, orig_cols = df_nybb.shape
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
# Validate
sql = text("SELECT * FROM {table};".format(table=table))
df = read_postgis(sql, engine, geom_col="geometry")
new_rows, new_cols = df.shape
# There should be twice as many rows in the new table
assert new_rows == orig_rows * 2, (
"There should be {target} rows,found: {current}".format(
target=orig_rows * 2, current=new_rows
),
)
# Number of columns should stay the same
assert new_cols == orig_cols, (
"There should be {target} columns,found: {current}".format(
target=orig_cols, current=new_cols
),
)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
"""
Tests that GeoDataFrame can be written to PostGIS without CRS information.
"""
engine = engine_postgis
table = "nybb"
# Write to db
df_nybb.geometry.array.crs = None
with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# Validate that srid is -1
sql = text(
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
schema="public", table=table, geom_col="geometry"
)
)
with engine.connect() as conn:
target_srid = conn.execute(sql).fetchone()[0]
assert target_srid == 0, "SRID should be 0, found %s" % target_srid
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
"""
Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
CRS information (GH #2414).
"""
engine = engine_postgis
table = "nybb"
# Write to db
df_nybb_esri = df_nybb.to_crs("ESRI:102003")
write_postgis(df_nybb_esri, con=engine, name=table, if_exists="replace")
# Validate that srid is 102003
sql = text(
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
schema="public", table=table, geom_col="geometry"
)
)
with engine.connect() as conn:
target_srid = conn.execute(sql).fetchone()[0]
assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_geometry_collection(
self, engine_postgis, df_geom_collection
):
"""
Tests that writing a mix of different geometry types is possible.
"""
engine = engine_postgis
table = "geomtype_tests"
write_postgis(df_geom_collection, con=engine, name=table, if_exists="replace")
# Validate geometry type
sql = text(
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
table=table
)
)
with engine.connect() as conn:
geom_type = conn.execute(sql).fetchone()[0]
sql = text("SELECT * FROM {table};".format(table=table))
df = read_postgis(sql, engine, geom_col="geometry")
assert geom_type.upper() == "GEOMETRYCOLLECTION"
assert df.geom_type.unique()[0] == "GeometryCollection"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_mixed_geometry_types(
self, engine_postgis, df_mixed_single_and_multi
):
"""
Tests that writing a mix of single and MultiGeometries is possible.
"""
engine = engine_postgis
table = "geomtype_tests"
write_postgis(
df_mixed_single_and_multi, con=engine, name=table, if_exists="replace"
)
# Validate geometry type
sql = text(
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
table=table
)
)
with engine.connect() as conn:
res = conn.execute(sql).fetchall()
assert res[0][0].upper() == "LINESTRING"
assert res[1][0].upper() == "MULTILINESTRING"
assert res[2][0].upper() == "POINT"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
"""
Tests that writing a LinearRing.
"""
engine = engine_postgis
table = "geomtype_tests"
write_postgis(df_linear_ring, con=engine, name=table, if_exists="replace")
# Validate geometry type
sql = text(
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
table=table
)
)
with engine.connect() as conn:
geom_type = conn.execute(sql).fetchone()[0]
assert geom_type.upper() == "LINESTRING"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
"""
Tests writing a LinearRing works.
"""
engine = engine_postgis
table = "geomtype_tests"
write_postgis(
df_mixed_single_and_multi,
con=engine,
name=table,
if_exists="replace",
chunksize=1,
)
# Validate row count
sql = text("SELECT COUNT(geometry) FROM {table};".format(table=table))
with engine.connect() as conn:
row_cnt = conn.execute(sql).fetchone()[0]
assert row_cnt == 3
# Validate geometry type
sql = text(
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
table=table
)
)
with engine.connect() as conn:
res = conn.execute(sql).fetchall()
assert res[0][0].upper() == "LINESTRING"
assert res[1][0].upper() == "MULTILINESTRING"
assert res[2][0].upper() == "POINT"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
"""
Tests writing data to alternative schema.
"""
engine = engine_postgis
table = "nybb"
schema_to_use = "test"
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
with engine.begin() as conn:
conn.execute(sql)
write_postgis(
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
)
# Validate
sql = text(
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
)
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_to_different_schema_when_table_exists(
self, engine_postgis, df_nybb
):
"""
Tests writing data to alternative schema.
"""
engine = engine_postgis
table = "nybb"
schema_to_use = "test"
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
with engine.begin() as conn:
conn.execute(sql)
try:
write_postgis(
df_nybb, con=engine, name=table, if_exists="fail", schema=schema_to_use
)
# Validate
sql = text(
"SELECT * FROM {schema}.{table};".format(
schema=schema_to_use, table=table
)
)
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
# Should raise a ValueError when table exists
except ValueError:
pass
# Try with replace flag on
write_postgis(
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
)
# Validate
sql = text(
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
)
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
"""
Tests writing a geometries with 3 dimensions works.
"""
engine = engine_postgis
table = "geomtype_tests"
write_postgis(df_3D_geoms, con=engine, name=table, if_exists="replace")
# Check that all geometries have 3 dimensions
sql = text("SELECT * FROM {table};".format(table=table))
df = read_postgis(sql, engine, geom_col="geometry")
assert list(df.geometry.has_z) == [True, True, True]
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_row_order(self, engine_postgis, df_nybb):
"""
Tests that the row order in db table follows the order of the original frame.
"""
engine = engine_postgis
table = "row_order_test"
correct_order = df_nybb["BoroCode"].tolist()
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# Check that the row order matches
sql = text("SELECT * FROM {table};".format(table=table))
df = read_postgis(sql, engine, geom_col="geometry")
assert df["BoroCode"].tolist() == correct_order
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_append_before_table_exists(self, engine_postgis, df_nybb):
"""
Tests that insert works with if_exists='append' when table does not exist yet.
"""
engine = engine_postgis
table = "nybb"
# If table exists, delete it before trying to write with defaults
drop_table_if_exists(engine, table)
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
# Check that the row order matches
sql = text("SELECT * FROM {table};".format(table=table))
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_append_with_different_crs(self, engine_postgis, df_nybb):
"""
Tests that the warning is raised if table CRS differs from frame.
"""
engine = engine_postgis
table = "nybb"
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# Reproject
df_nybb2 = df_nybb.to_crs(epsg=4326)
# Should raise error when appending
with pytest.raises(ValueError, match="CRS of the target table"):
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_append_without_crs(self, engine_postgis, df_nybb):
# This test was included in #3328 when the default value for no
# CRS was changed from an SRID of -1 to 0. This resolves issues
# of appending dataframes to postgis that have no CRS as postgis
# no CRS value is 0.
engine = engine_postgis
df_nybb = df_nybb.set_crs(None, allow_override=True)
table = "nybb"
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# append another dataframe with no crs
df_nybb2 = df_nybb
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
@pytest.mark.xfail(
compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
)
def test_duplicate_geometry_column_fails(self, engine_postgis):
"""
Tests that a ValueError is raised if an SQL query returns two geometry columns.
"""
engine = engine_postgis
sql = "select ST_MakePoint(0, 0) as geom, ST_MakePoint(0, 0) as geom;"
with pytest.raises(ValueError):
read_postgis(sql, engine, geom_col="geom")
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="esri:54052")
create_postgis(con, df_nybb, srid=54052)
sql = "SELECT * FROM nybb;"
df = read_postgis(sql, con)
validate_boro_df(df)
assert df.crs == "ESRI:54052"
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
@mock.patch("shapely.get_srid")
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
# mock a non-existent srid for edge case if shapely has an srid
# not present in postgis table.
pyproj = pytest.importorskip("pyproj")
mock_get_srid.return_value = 99999
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="epsg:4326")
create_postgis(con, df_nybb)
sql = "SELECT * FROM nybb;"
with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
with pytest.warns(UserWarning, match="Could not find srid 99999"):
read_postgis(sql, con)
@mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_no_spatial_ref_sys_table_in_postgis(
self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
):
# mock for a non-existent spatial_ref_sys database
mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="epsg:4326")
create_postgis(con, df_nybb, srid=4326)
sql = "SELECT * FROM nybb;"
with pytest.warns(
UserWarning, match="Could not find the spatial reference system table"
):
df = read_postgis(sql, con)
assert df.crs == "EPSG:4326"
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
"""Test chunksize argument with non epsg crs"""
chunksize = 2
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="esri:54052")
create_postgis(con, df_nybb, srid=54052)
sql = "SELECT * FROM nybb;"
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
validate_boro_df(df)
assert df.crs == "ESRI:54052"