"""Implements classes for generating data by schema."""
import csv
import json
import pickle
import re
import typing as t
from mimesis.exceptions import FieldError, FieldsetError, SchemaError
from mimesis.locales import Locale
from mimesis.providers.base import BaseProvider
from mimesis.providers.generic import Generic
from mimesis.types import (
JSON,
CallableSchema,
FieldCache,
Key,
MissingSeed,
Seed,
)
__all__ = ["BaseField", "Field", "Fieldset", "Schema"]
[docs]class BaseField:
def __init__(
self,
locale: Locale = Locale.DEFAULT,
seed: Seed = MissingSeed,
providers: t.Optional[t.Sequence[t.Any]] = None,
) -> None:
"""Initialize field.
:param locale: Locale
:param seed: Seed for random.
"""
self._gen = Generic(locale, seed)
if providers:
self._gen.add_providers(*providers)
self._cache: FieldCache = {}
[docs] def reseed(self, seed: Seed = MissingSeed) -> None:
"""Reseed the random generator.
:param seed: Seed for random.
"""
self._gen.reseed(seed)
def _explicit_lookup(self, name: str) -> t.Any:
"""An explicit method lookup.
This method is called when the field
defined explicitly, like this: ``provider.method``
:param name: The field name.
:return: Callable object.
:raise FieldError: When field is invalid.
"""
provider_name, method_name = name.split(".", 1)
try:
provider = getattr(self._gen, provider_name)
return getattr(provider, method_name)
except AttributeError:
raise FieldError(name)
def _fuzzy_lookup(self, name: str) -> t.Any:
"""A fuzzy method lookup.
This method is called when the field definition
is fuzzy, like this: ``method``
:param name: The field name.
:return: Callable object.
:raise FieldError: When field is invalid.
"""
for provider in dir(self._gen):
provider = getattr(self._gen, provider)
if isinstance(provider, BaseProvider):
if name in dir(provider):
return getattr(provider, name)
raise FieldError(name)
def _lookup_method(self, name: str) -> t.Any:
"""Lookup method by the field name.
:param name: The field name.
:return: Callable object.
:raise FieldError: When field is invalid.
"""
# Support additional delimiters
name = re.sub(r"[/:\s]", ".", name)
if name.count(".") > 1:
raise FieldError(name)
if name not in self._cache:
if "." not in name:
method = self._fuzzy_lookup(name)
else:
method = self._explicit_lookup(name)
self._cache[name] = method
return self._cache[name]
def __str__(self) -> str:
return f"{self.__class__.__name__} <{self._gen.locale}>"
[docs]class Field(BaseField):
"""Greedy field.
The field whcih evaluates immediately.
.. warning::
There is no case when you need to instance **field** in loops.
If you doing this:
>>> for i in range(1000):
... field = Field()
You doing it **wrong**! It is a terrible idea that will lead to a memory leak.
Forewarned is forearmed.
Here is usage example:
>>> _ = Field()
>>> _('username')
Dogtag_1836
"""
def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
return self.perform(*args, **kwargs)
[docs]class Fieldset(BaseField):
"""Greedy fieldset (evaluates immediately).
Works like a field, but returns a list of values.
Here is usage example:
>>> fieldset = Fieldset(i=100)
>>> fieldset('username')
['pot_1821', 'vhs_1915', ..., 'reviewed_1849']
You may also specify the number of iterations by passing the **i** keyword
argument to the callable instance of fieldset:
>>> fieldset = Fieldset()
>>> fieldset('username', i=2)
['pot_1821', 'vhs_1915']
When **i** is not specified, the reasonable default is used — **10**.
See "Field vs Fieldset" section of documentation for more details.
:cvar fieldset_default_iterations: Default iterations. Default is **10**.
:cvar fieldset_iterations_kwarg: Keyword argument for iterations. Default is **i**.
"""
fieldset_default_iterations: int = 10
fieldset_iterations_kwarg: str = "i"
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
"""Initialize fieldset.
Accepts additional keyword argument **i** which is used
to specify the number of iterations.
The name of the keyword argument can be changed by
overriding **fieldset_iterations_kwarg** attribute of this class.
"""
self._iterations = kwargs.pop(
self.fieldset_iterations_kwarg,
self.fieldset_default_iterations,
)
super().__init__(*args, **kwargs)
def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.List[t.Any]:
"""Perform fieldset.
:param args: Arguments for field.
:param kwargs: Keyword arguments for field.
:raises FieldsetError: If iterations less than 1.
:return: List of values.
"""
min_iterations = 1
iterations = kwargs.pop(
self.fieldset_iterations_kwarg,
self._iterations,
)
if iterations < min_iterations:
raise FieldsetError()
return [self.perform(*args, **kwargs) for _ in range(iterations)]
[docs]class Schema:
"""Class which return list of filled schemas."""
__slots__ = (
"_count",
"_schema",
"iterations",
"_min_iterations",
)
[docs] def __init__(self, schema: CallableSchema, iterations: int = 10) -> None:
"""Initialize schema.
:param iterations: Number of iterations.
This parameter is keyword-only. The default value is 10.
:param schema: A schema (must be a callable object).
"""
if schema and callable(schema): # type: ignore[truthy-function]
self._schema = schema
self._count = 0
self._min_iterations = 1
if iterations >= self._min_iterations:
self.iterations = iterations
else:
raise ValueError(
f"Iterations must be greater than {self._min_iterations}"
)
else:
# This is just a better error message
raise SchemaError()
[docs] def to_csv(self, file_path: str, **kwargs: t.Any) -> None:
"""Export a schema as a CSV file.
:param file_path: File path.
:param kwargs: The keyword arguments for :py:class:`csv.DictWriter` class.
*New in version 5.3.0*
"""
data = self.create()
with open(file_path, "w", encoding="utf-8", newline="") as fp:
fieldnames = list(data[0])
dict_writer = csv.DictWriter(fp, fieldnames, **kwargs)
dict_writer.writeheader()
dict_writer.writerows(data)
[docs] def to_json(self, file_path: str, **kwargs: t.Any) -> None:
"""Export a schema as a JSON file.
:param file_path: File path.
:param kwargs: Extra keyword arguments for :py:func:`json.dump` class.
*New in version 5.3.0*
"""
with open(file_path, "w", encoding="utf-8") as fp:
json.dump(self.create(), fp, **kwargs)
[docs] def to_pickle(self, file_path: str, **kwargs: t.Any) -> None:
"""Export a schema as the pickled representation of the object to the file.
:param file_path: File path.
:param kwargs: Extra keyword arguments for :py:func:`pickle.dump` class.
*New in version 5.3.0*
"""
with open(file_path, "wb") as fp:
pickle.dump(self.create(), fp, **kwargs)
[docs] def create(self) -> t.List[JSON]:
"""Creates a list of a fulfilled schemas.
.. note::
This method evaluates immediately, so be careful on creating
large datasets otherwise you're risking running out of memory.
If you need a lazy version of this method, see :meth:`iterator`.
:return: List of fulfilled schemas.
"""
return [self._schema() for _ in range(self.iterations)]
def __next__(self) -> JSON:
"""Return the next item from the iterator."""
if self._count < self.iterations:
self._count += 1
return self._schema()
raise StopIteration
def __iter__(self) -> "Schema":
"""Return the iterator object itself."""
self._count = 0
return self