Skip to content

Commit

Permalink
GH-45433: [Python] Remove Cython workarounds
Browse files Browse the repository at this point in the history
We've accumulated multiple workarounds for various Cython issues over the years.

Now that we require at least Cython 3.0, we can remove some of those.
  • Loading branch information
pitrou committed Feb 5, 2025
1 parent 0556905 commit c6fb259
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 117 deletions.
7 changes: 0 additions & 7 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,7 @@ def parse_git(root, **kwargs):
except ImportError:
__version__ = None

# ARROW-8684: Disable GC while initializing Cython extension module,
# to workaround Cython bug in https://github.com/cython/cython/issues/3603
_gc_enabled = _gc.isenabled()
_gc.disable()
import pyarrow.lib as _lib
if _gc_enabled:
_gc.enable()

from pyarrow.lib import (BuildInfo, RuntimeInfo, set_timezone_db_path,
MonthDayNano, VersionInfo, cpp_build_info,
cpp_version, cpp_version_info, runtime_info,
Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/_flight.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2460,8 +2460,8 @@ cdef dict convert_headers(const CCallHeaders& c_headers):
CCallHeaders.const_iterator header_iter = c_headers.cbegin()
headers = {}
while header_iter != c_headers.cend():
header = c_string(deref(header_iter).first).decode("ascii")
value = c_string(deref(header_iter).second)
header = to_string(deref(header_iter).first).decode("ascii")
value = to_string(deref(header_iter).second)
if not header.endswith("-bin"):
# Text header values in gRPC (and HTTP/1, HTTP/2) are
# required to be valid ASCII. Binary header values are
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/_parquet_encryption.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from datetime import timedelta

from cython.operator cimport dereference as deref
from libcpp.memory cimport shared_ptr

from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.lib cimport _Weakrefable
Expand Down
74 changes: 24 additions & 50 deletions python/pyarrow/includes/common.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@
# distutils: language = c++

from libc.stdint cimport *

from libcpp cimport bool as c_bool, nullptr
from libcpp.functional cimport function
from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
from libcpp.memory cimport (shared_ptr, unique_ptr, make_shared,
static_pointer_cast, dynamic_pointer_cast)
from libcpp.optional cimport nullopt, optional
from libcpp.string cimport string as c_string
from libcpp.utility cimport pair
from libcpp.utility cimport move, pair
from libcpp.vector cimport vector
from libcpp.unordered_map cimport unordered_map
from libcpp.unordered_set cimport unordered_set
Expand All @@ -32,54 +35,27 @@ from cpython.datetime cimport PyDateTime_DateTime
cimport cpython


cdef extern from * namespace "std" nogil:
cdef shared_ptr[T] static_pointer_cast[T, U](shared_ptr[U])


cdef extern from "<optional>" namespace "std" nogil:
cdef cppclass optional[T]:
ctypedef T value_type
optional()
optional(nullopt_t)
optional(optional&) except +
optional(T&) except +
c_bool has_value()
T& value()
T& value_or[U](U& default_value)
void swap(optional&)
void reset()
T& emplace(...)
T& operator*()
# T* operator->() # Not Supported
optional& operator=(optional&)
optional& operator=[U](U&)
cdef extern from "<string_view>" namespace "std" nogil:
# Needed until https://github.com/cython/cython/issues/6651 is fixed
cdef cppclass cpp_string_view "std::string_view":
string_view()
string_view(const char*)
string_view(c_string&)
size_t size()
bint empty()
const char* data()


# vendored from the cymove project https://github.com/ozars/cymove
cdef extern from * namespace "cymove" nogil:
"""
#include <type_traits>
#include <utility>
namespace cymove {
template <typename T>
inline typename std::remove_reference<T>::type&& cymove(T& t) {
return std::move(t);
}
template <typename T>
inline typename std::remove_reference<T>::type&& cymove(T&& t) {
return std::move(t);
}
} // namespace cymove
"""
cdef T move" cymove::cymove"[T](T)

cdef extern from * namespace "arrow::py" nogil:
"""
#include <memory>
#include <string>
#include <string_view>
#include <utility>
namespace arrow {
namespace py {
template <typename T>
std::shared_ptr<T> to_shared(std::unique_ptr<T>& t) {
return std::move(t);
Expand All @@ -88,10 +64,17 @@ cdef extern from * namespace "arrow::py" nogil:
std::shared_ptr<T> to_shared(std::unique_ptr<T>&& t) {
return std::move(t);
}
// Needed until https://github.com/cython/cython/issues/6651 is fixed
inline std::string to_string(std::string_view s) {
return std::string(s);
}
} // namespace py
} // namespace arrow
"""
cdef shared_ptr[T] to_shared" arrow::py::to_shared"[T](unique_ptr[T])
cdef c_string to_string(cpp_string_view s)

cdef extern from "arrow/python/platform.h":
pass
Expand Down Expand Up @@ -173,12 +156,3 @@ cdef inline object PyObject_to_object(PyObject* o):
cdef object result = <object> o
cpython.Py_DECREF(result)
return result


cdef extern from "<string_view>" namespace "std" nogil:
cdef cppclass cpp_string_view "std::string_view":
string_view()
string_view(const char*)
size_t size()
bint empty()
const char* data()
2 changes: 1 addition & 1 deletion python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -1078,7 +1078,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
shared_ptr[CRecordBatch] batch
# The struct in C++ does not actually have these two `const` qualifiers, but
# adding `const` gets Cython to not complain
const shared_ptr[const CKeyValueMetadata] custom_metadata
shared_ptr[const CKeyValueMetadata] custom_metadata

cdef cppclass CTable" arrow::Table":
CTable(const shared_ptr[CSchema]& schema,
Expand Down
18 changes: 4 additions & 14 deletions python/pyarrow/includes/libarrow_flight.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_python cimport CTimePoint

from libcpp.map cimport multimap


cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
cdef char* CTracingServerMiddlewareName\
Expand Down Expand Up @@ -311,20 +313,8 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
cdef cppclass CCallInfo" arrow::flight::CallInfo":
CFlightMethod method

# This is really std::unordered_multimap, but Cython has no
# bindings for it, so treat it as an opaque class and bind the
# methods we need
cdef cppclass CCallHeaders" arrow::flight::CallHeaders":
cppclass const_iterator:
pair[c_string, c_string] operator*()
# For Cython < 3
const_iterator operator++()
# For Cython >= 3
const_iterator operator++(int)
bint operator==(const_iterator)
bint operator!=(const_iterator)
const_iterator cbegin()
const_iterator cend()
ctypedef multimap[cpp_string_view, cpp_string_view] CCallHeaders\
" arrow::flight::CallHeaders"

cdef cppclass CAddCallHeaders" arrow::flight::AddCallHeaders":
void AddHeader(const c_string& key, const c_string& value)
Expand Down
5 changes: 2 additions & 3 deletions python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

from libc.stdlib cimport malloc, free

from cpython.bytearray cimport PyByteArray_FromStringAndSize

import codecs
import pickle
import re
Expand All @@ -43,9 +45,6 @@ cdef extern from "Python.h":
PyObject* PyBytes_FromStringAndSizeNative" PyBytes_FromStringAndSize"(
char *v, Py_ssize_t len) except NULL

# Workaround https://github.com/cython/cython/issues/4707
bytearray PyByteArray_FromStringAndSize(char *string, Py_ssize_t len)


def have_libhdfs():
"""
Expand Down
44 changes: 5 additions & 39 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,51 +18,17 @@
# cython: language_level = 3

from cpython cimport PyObject
from cpython.slice cimport PySlice_Check

from libcpp cimport nullptr, bool as c_bool
from libcpp.cast cimport dynamic_cast
from libcpp.memory cimport dynamic_pointer_cast
from libcpp.memory cimport static_pointer_cast, dynamic_pointer_cast
from libcpp.utility cimport move

from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_python cimport *

# Will be available in Cython 3, not backported
# ref: https://github.com/cython/cython/issues/3293#issuecomment-1223058101
cdef extern from "<optional>" namespace "std" nogil:
cdef cppclass nullopt_t:
nullopt_t()

cdef nullopt_t nullopt

cdef cppclass optional[T]:
ctypedef T value_type
optional()
optional(nullopt_t)
optional(optional&) except +
optional(T&) except +
c_bool has_value()
T& value()
T& value_or[U](U& default_value)
void swap(optional&)
void reset()
T& emplace(...)
T& operator*()
# T* operator->() # Not Supported
optional& operator=(optional&)
optional& operator=[U](U&)
c_bool operator bool()
c_bool operator!()
c_bool operator==[U](optional&, U&)
c_bool operator!=[U](optional&, U&)
c_bool operator<[U](optional&, U&)
c_bool operator>[U](optional&, U&)
c_bool operator<=[U](optional&, U&)
c_bool operator>=[U](optional&, U&)

optional[T] make_optional[T](...) except +

cdef extern from "Python.h":
int PySlice_Check(object)


cdef int check_status(const CStatus& status) except -1 nogil
cdef object convert_status(const CStatus& status)
Expand Down

0 comments on commit c6fb259

Please sign in to comment.