"""
Helper module to convert user inputs into normalized forms.
"""
from __future__ import absolute_import
from typing import Iterable, Optional, Tuple, Union
import pandas as pd
import pyarrow as pa
__all__ = (
"converter_str",
"converter_str_set",
"converter_str_set_optional",
"converter_str_tupleset",
"converter_tuple",
"get_str_to_python_converter",
)
[docs]def converter_str_set(obj) -> frozenset:
"""
Convert input to a set of unicode strings. ``None`` will be converted to an empty set.
Parameters
----------
obj: Optional[Union[Iterable[str], str]]
Object to convert.
Returns
-------
obj: FrozenSet[str]
String set.
Raises
------
TypeError
If passed object is not string/byte-like.
"""
result = converter_tuple(obj)
result_set = {converter_str(x) for x in result}
return frozenset(result_set)
[docs]def converter_str_set_optional(obj):
"""
Convert input to a set of unicode strings. ``None`` will be preserved.
Parameters
----------
obj: Optional[Union[Iterable[str], str]]
Object to convert.
Returns
-------
obj: Optional[FrozenSet[str]]
String set.
Raises
------
ValueError
If an element in the passed object is not string/byte/like.
"""
if obj is None:
return None
return converter_str_set(obj)
[docs]def converter_str_tupleset(obj: Optional[Union[Iterable[str], str]]) -> Tuple[str, ...]:
"""
Convert input to tuple of unique unicode strings. ``None`` will be converted to an empty set.
The input must not contain duplicate entries.
Parameters
----------
obj
Object to convert.
Raises
------
TypeError
If passed object is not string/byte-like, or if ``obj`` is known to have an unstable iteration order.
ValueError
If passed set contains duplicates.
"""
if isinstance(obj, (dict, frozenset, set)):
raise TypeError(
"{obj} which has type {tname} has an unstable iteration order".format(
obj=obj, tname=type(obj).__name__
)
)
result = converter_tuple(obj)
result = tuple(converter_str(x) for x in result)
if len(set(result)) != len(result):
raise ValueError("Tuple-set contains duplicates: {}".format(", ".join(result)))
return result
[docs]def converter_tuple(obj) -> tuple:
"""
Convert input to a tuple. ``None`` will be converted to an empty tuple.
Parameters
----------
obj: Any
Object to convert.
Returns
-------
obj: Tuple[Any]
Tuple.
"""
if obj is None:
return ()
elif hasattr(obj, "__iter__") and not isinstance(obj, (str, bytes)):
return tuple(x for x in obj)
else:
return (obj,)
[docs]def converter_str(obj) -> str:
"""
Ensures input is a unicode string.
Parameters
----------
obj: str
Object to convert.
Returns
-------
obj: str
String.
Raises
------
TypeError
If passed object is not string/byte-like.
"""
if isinstance(obj, str):
return obj
elif isinstance(obj, bytes):
return obj.decode("utf-8")
else:
raise TypeError(
"Object of type {type} is not a string: {obj}".format(
obj=obj, type=type(obj).__name__
)
)
[docs]def get_str_to_python_converter(pa_type):
"""
Get converter to parse string into python object.
Parameters
----------
pa_type: pyarrow.DataType
Data type.
Returns
-------
converter: Callable[[str], Any]
Converter.
"""
if pa.types.is_boolean(pa_type):
def var_f(x):
if x.lower() in ("0", "f", "n", "false", "no"):
return False
elif x.lower() in ("1", "t", "y", "true", "yes"):
return True
else:
raise ValueError("Cannot parse bool: {}".format(x))
return var_f
elif pa.types.is_floating(pa_type):
return float
elif pa.types.is_integer(pa_type):
return int
elif pa.types.is_string(pa_type):
def var_f(x):
if len(x) > 1:
for char in ('"', "'"):
if x.startswith(char) and x.endswith(char):
return x[1:-1]
return x
return var_f
elif pa.types.is_timestamp(pa_type):
return pd.Timestamp
else:
raise ValueError("Cannot handle type {pa_type}".format(pa_type=pa_type))