"""
Reusable value and type casting transformations
This module is stable on a "best effort"-basis, and small backwards incompatibilities
can be introduced in "minor"/"patch" version bumps (it will not be considered a
regression automatically).
While it can be used by plugin writers, it is not intended for general public use.
.. testsetup:: *
# workaround for missing import in sphinx-doctest
from ini2toml.transformations import *
"""
import warnings
from collections.abc import MutableMapping
from functools import reduce, wraps
from typing import (
Any,
Callable,
List,
Optional,
Sequence,
Tuple,
TypeVar,
Union,
cast,
overload,
)
from .types import Commented, CommentedKV, CommentedList
CP = ("#", ";")
"""Default Comment Prefixes"""
S = TypeVar("S")
T = TypeVar("T")
U = TypeVar("U")
V = TypeVar("V")
X = TypeVar("X")
Y = TypeVar("Y")
M = TypeVar("M", bound=MutableMapping)
KV = Tuple[str, T]
FN = Callable[[X], Y]
Scalar = Union[int, float, bool, str] # TODO: missing time and datetime
"""Simple data types with TOML correspondence"""
CoerceFn = Callable[[str], T]
"""Functions that know how to parser/coerce string values into different types"""
Transformation = Union[Callable[[str], Any], Callable[[M], M]]
"""There are 2 main types of transformation:
- The first one is a simple transformation that processes a string value (coming from an
option in the original CFG/INI file) into a value with an equivalent TOML data type.
For example: transforming ``"2"`` (string) into ``2`` (integer).
- The second one tries to preserve metadata (such as comments) from the original CFG/INI
file. This kind of transformation processes a string value into an intermediary
representation (e.g. :obj:`Commented`, :obj:`CommentedList`, :obj:`CommentedKV`)
that needs to be properly handled before adding to the TOML document.
In a higher level we can also consider an ensemble of transformations that transform an
entire table of the TOML document.
"""
TF = TypeVar("TF", bound=Transformation)
# ---- Simple value processors ----
# These functions return plain objects, that can be directly added to the TOML document
[docs]
def noop(x: T) -> T:
"""Return the value unchanged"""
return x
[docs]
def is_true(value: str) -> bool:
"""``value in ("true", "1", "yes", "on")``"""
value = value.lower()
return value in ("true", "1", "yes", "on")
[docs]
def is_false(value: str) -> bool:
"""``value in ("false", "0", "no", "off", "none", "null", "nil")``"""
value = value.lower()
return value in ("false", "0", "no", "off", "none", "null", "nil")
[docs]
def is_float(value: str) -> bool:
cleaned = value.lower().lstrip("+-").replace(".", "").replace("_", "")
return cleaned.isdecimal() and value.count(".") <= 1 or cleaned in ("inf", "nan")
[docs]
def coerce_bool(value: str) -> bool:
"""Convert the value based on :func:`~.is_true` and :func:`~.is_false`."""
if is_true(value):
return True
if is_false(value):
return False
raise ValueError(f"{value!r} cannot be converted to boolean")
[docs]
def coerce_scalar(value: str) -> Scalar:
"""Try to convert the given string to a proper "scalar" type (e.g. integer, float,
bool, ...) with an direct TOML equivalent.
If the conversion is unknown or not possible, it will return the same input value
(as string).
.. note:: This function "guesses" the value type based in heuristics and/or regular
expressions, therefore there is no guarantee the output has the same type as
intended by the original author.
.. note:: Currently date/time-related types are not supported.
"""
value = value.strip()
if value.isdecimal():
return int(value)
if is_float(value):
return float(value)
if is_true(value):
return True
elif is_false(value):
return False
# Do we need this? Or is there a better way? How about time objects
# > try:
# > return datetime.fromisoformat(value)
# > except ValueError:
# > pass
return value
[docs]
def kebab_case(field: str) -> str:
return field.lower().replace("_", "-")
[docs]
def deprecated(
name: str, fn: TF = noop, instead: str = "" # type: ignore[assignment]
) -> TF:
"""Wrapper around the ``fn`` transformation to warn user about deprecation."""
extra = f". Use {instead!r} instead" if instead else ""
@wraps(fn)
def _fn(*args, **kwargs):
msg = f"{name!r} is deprecated{extra}"
warnings.warn(msg, DeprecationWarning, stacklevel=2)
return fn(*args, **kwargs)
return cast(TF, _fn)
# ---- Complex value processors ----
# These functions return an intermediate representation of the value,
# that need `apply` to be added to a container
@overload
def split_comment(value: str, *, comment_prefixes=CP) -> Commented[str]: ...
@overload
def split_comment(
value: str, coerce_fn: CoerceFn[T], comment_prefixes=CP
) -> Commented[T]: ...
[docs]
def split_scalar(value: str, *, comment_prefixes=CP) -> Commented[Scalar]:
"""Combination of :func:`~.split_comment` and :func:`~.coerce_scalar`."""
return split_comment(value, coerce_scalar, comment_prefixes)
@overload
def split_list(
value: str,
sep: str = ",",
*,
subsplit_dangling=True,
comment_prefixes=CP,
force_multiline=False,
) -> CommentedList[str]: ...
@overload
def split_list(
value: str,
sep: str = ",",
*,
coerce_fn: CoerceFn[T],
subsplit_dangling=True,
comment_prefixes=CP,
force_multiline=False,
) -> CommentedList[T]: ...
@overload
def split_list(
value: str,
sep: str,
coerce_fn: CoerceFn[T],
subsplit_dangling=True,
comment_prefixes=CP,
force_multiline=False,
) -> CommentedList[T]: ...
[docs]
def split_list(
value,
sep=",",
coerce_fn=noop,
subsplit_dangling=True,
comment_prefixes=CP,
force_multiline=False,
):
"""Value encoded as a (potentially) dangling list values separated by ``sep``.
This function will first try to split the value by lines (dangling list) using
:func:`str.splitlines`. Then, if ``subsplit_dangling=True``, it will split each line
using ``sep``. As a result a list of items is obtained.
For each item in this list ``coerce_fn`` is applied.
"""
if not isinstance(value, str):
return value
comment_prefixes = [p for p in comment_prefixes if sep not in p]
values = value.strip().splitlines()
if not subsplit_dangling and (len(values) > 1 or force_multiline):
sep += "\n" # force a pattern that cannot be found in a split line
def _split(line: str) -> list:
return [coerce_fn(v.strip()) for v in line.split(sep) if v]
return CommentedList([split_comment(v, _split, comment_prefixes) for v in values])
@overload
def split_kv_pairs(
value: str,
key_sep: str = "=",
*,
pair_sep=",",
subsplit_dangling=True,
comment_prefixes=CP,
) -> CommentedKV[str]: ...
@overload
def split_kv_pairs(
value: str,
key_sep: str = "=",
*,
coerce_fn: CoerceFn[T],
pair_sep=",",
subsplit_dangling=True,
comment_prefixes=CP,
) -> CommentedKV[T]: ...
@overload
def split_kv_pairs(
value: str,
key_sep: str,
coerce_fn: CoerceFn[T],
pair_sep=",",
subsplit_dangling=True,
comment_prefixes=CP,
) -> CommentedKV[T]: ...
[docs]
def split_kv_pairs(
value,
key_sep="=",
coerce_fn=noop,
pair_sep=",",
subsplit_dangling=True,
comment_prefixes=CP,
):
"""Value encoded as a (potentially) dangling list of key-value pairs.
This function will first try to split the value by lines (dangling list) using
:func:`str.splitlines`. Then, if ``subsplit_dangling=True``, it will split each line
using ``pair_sep``. As a result a list of key-value pairs is obtained.
For each item in this list, the key is separated from the value by ``key_sep``.
``coerce_fn`` is used to convert the value in each pair.
"""
prefixes = [p for p in comment_prefixes if key_sep not in p and pair_sep not in p]
values = value.strip().splitlines()
if not subsplit_dangling and len(values) > 1:
pair_sep += "\n" # force a pattern that cannot be found in a split line
def _split_kv(line: str) -> List[KV]:
pairs = (
item.split(key_sep, maxsplit=1)
for item in line.strip().split(pair_sep)
if key_sep in item
)
return [(k.strip(), coerce_fn(v.strip())) for k, v in pairs]
return CommentedKV([split_comment(v, _split_kv, prefixes) for v in values])
# ---- Public Helpers ----
[docs]
def remove_prefixes(text: str, prefixes: Sequence[str]):
text = text.strip()
for prefix in prefixes:
if prefix and text.startswith(prefix):
return text[len(prefix) :].strip()
return text
[docs]
def apply(x, fn):
"""Useful to reduce over a list of functions"""
return fn(x)
@overload
def pipe(fn1: FN[S, T], fn2: FN[T, U]) -> FN[S, U]: ...
@overload
def pipe(fn1: FN[S, T], fn2: FN[T, U], fn3: FN[U, V]) -> FN[S, V]: ...
@overload
def pipe(fn1: FN[S, T], fn2: FN[T, U], fn3: FN[U, V], fn4: FN[V, X]) -> FN[S, X]: ...
@overload
def pipe(
fn1: FN[S, T], fn2: FN[T, U], fn3: FN[U, V], fn4: FN[V, X], fn5: FN[X, Y]
) -> FN[S, Y]: ...
@overload
def pipe(
fn1: FN[S, T],
fn2: FN[T, U],
fn3: FN[U, V],
fn4: FN[V, X],
fn5: FN[X, Y],
*fn: FN[Y, Y],
) -> FN[S, Y]: ...
[docs]
def pipe(*fns):
"""Compose 1-argument functions respecting the sequence they should be applied:
.. code-block:: python
pipe(fn1, fn2, fn3, ..., fnN)(x) == fnN(...(fn3(fn2(fn1(x)))))
"""
return lambda x: reduce(apply, fns, x)
# ---- Private Helpers ----
def _line_remove_comment(line: str, comment_mark="#") -> Optional[str]:
"""
>>> print(_line_remove_comment(" # hello world"))
None
>>> _line_remove_comment(" abc # hello world")
' abc'
>>> _line_remove_comment(" ")
' '
>>> _line_remove_comment("")
''
"""
if comment_mark not in line:
# Keep empty lines intentionally added by the user
return line
if line.strip().startswith(comment_mark):
# Remove empty lines that were just inserted as a replacement for comments
return None
return line.partition(comment_mark)[0].rstrip()
def _strip_prefix(msg: Optional[str], prefixes: Sequence[str] = CP) -> Optional[str]:
if not msg:
return None
return remove_prefixes(msg, prefixes)