DataFrame
Package: flyte.io
This is the user facing DataFrame class. Please don’t confuse it with the literals.StructuredDataset class (that is just a model, a Python class representation of the protobuf).
class DataFrame(
data: Any,
)Create a new model by parsing and validating input data from keyword arguments.
Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be
validated to form a valid model.
self is explicitly positional-only to allow self as a field name.
| Parameter | Type | Description |
|---|---|---|
data |
Any |
Methods
| Method | Description |
|---|---|
all() |
|
column_names() |
|
columns() |
|
construct() |
|
copy() |
Returns a copy of the model. |
deserialize_dataframe() |
|
dict() |
|
from_df() |
Wrapper to create a DataFrame from a dataframe. |
from_existing_remote() |
Create a DataFrame reference from an existing remote dataframe. |
from_orm() |
|
iter() |
|
json() |
|
model_construct() |
Creates a new instance of the Model class with validated data. |
model_copy() |
!!! abstract “Usage Documentation”. |
model_dump() |
!!! abstract “Usage Documentation”. |
model_dump_json() |
!!! abstract “Usage Documentation”. |
model_json_schema() |
Generates a JSON schema for a model class. |
model_parametrized_name() |
Compute the class name for parametrizations of generic classes. |
model_post_init() |
This function is meant to behave like a BaseModel method to initialise private attributes. |
model_rebuild() |
Try to rebuild the pydantic-core schema for the model. |
model_validate() |
Validate a pydantic model instance. |
model_validate_json() |
!!! abstract “Usage Documentation”. |
model_validate_strings() |
Validate the given object with string data against the Pydantic model. |
open() |
Load the handler if needed. |
parse_file() |
|
parse_obj() |
|
parse_raw() |
|
schema() |
|
schema_json() |
|
serialize_dataframe() |
|
set_literal() |
A public wrapper method to set the DataFrame Literal. |
update_forward_refs() |
|
validate() |
all()
def all()column_names()
def column_names()columns()
def columns()construct()
def construct(
_fields_set: set[str] | None,
values: Any,
) -> Self| Parameter | Type | Description |
|---|---|---|
_fields_set |
set[str] | None |
|
values |
Any |
copy()
def copy(
include: AbstractSetIntStr | MappingIntStrAny | None,
exclude: AbstractSetIntStr | MappingIntStrAny | None,
update: Dict[str, Any] | None,
deep: bool,
) -> SelfReturns a copy of the model.
> [!WARNING] Deprecated
> This method is now deprecated; use model_copy instead.
If you need include or exclude, use:
data = self.model_dump(include=include, exclude=exclude, round_trip=True)
data = {**data, **(update or {})}
copied = self.model_validate(data)| Parameter | Type | Description |
|---|---|---|
include |
AbstractSetIntStr | MappingIntStrAny | None |
Optional set or mapping specifying which fields to include in the copied model. |
exclude |
AbstractSetIntStr | MappingIntStrAny | None |
Optional set or mapping specifying which fields to exclude in the copied model. |
update |
Dict[str, Any] | None |
Optional dictionary of field-value pairs to override field values in the copied model. |
deep |
bool |
If True, the values of fields that are Pydantic models will be deep-copied. |
deserialize_dataframe()
def deserialize_dataframe(
info,
) -> DataFrame| Parameter | Type | Description |
|---|---|---|
info |
dict()
def dict(
include: IncEx | None,
exclude: IncEx | None,
by_alias: bool,
exclude_unset: bool,
exclude_defaults: bool,
exclude_none: bool,
) -> Dict[str, Any]| Parameter | Type | Description |
|---|---|---|
include |
IncEx | None |
|
exclude |
IncEx | None |
|
by_alias |
bool |
|
exclude_unset |
bool |
|
exclude_defaults |
bool |
|
exclude_none |
bool |
from_df()
def from_df(
val: typing.Optional[typing.Any],
uri: typing.Optional[str],
) -> DataFrameWrapper to create a DataFrame from a dataframe. The reason this is implemented as a wrapper instead of a full translation invoking the type engine and the encoders is because there’s too much information in the type signature of the task that we don’t want the user to have to replicate.
| Parameter | Type | Description |
|---|---|---|
val |
typing.Optional[typing.Any] |
|
uri |
typing.Optional[str] |
from_existing_remote()
def from_existing_remote(
remote_path: str,
format: typing.Optional[str],
kwargs,
) -> 'DataFrame'Create a DataFrame reference from an existing remote dataframe.
| Parameter | Type | Description |
|---|---|---|
remote_path |
str |
The remote path to the existing dataframe |
format |
typing.Optional[str] |
Format of the stored dataframe |
kwargs |
**kwargs |
from_orm()
def from_orm(
obj: Any,
) -> Self| Parameter | Type | Description |
|---|---|---|
obj |
Any |
iter()
def iter()json()
def json(
include: IncEx | None,
exclude: IncEx | None,
by_alias: bool,
exclude_unset: bool,
exclude_defaults: bool,
exclude_none: bool,
encoder: Callable[[Any], Any] | None,
models_as_dict: bool,
dumps_kwargs: Any,
) -> str| Parameter | Type | Description |
|---|---|---|
include |
IncEx | None |
|
exclude |
IncEx | None |
|
by_alias |
bool |
|
exclude_unset |
bool |
|
exclude_defaults |
bool |
|
exclude_none |
bool |
|
encoder |
Callable[[Any], Any] | None |
|
models_as_dict |
bool |
|
dumps_kwargs |
Any |
model_construct()
def model_construct(
_fields_set: set[str] | None,
values: Any,
) -> SelfCreates a new instance of the Model class with validated data.
Creates a new model setting __dict__ and __pydantic_fields_set__ from trusted or pre-validated data.
Default values are respected, but no other validation is performed.
> [!NOTE]
> model_construct() generally respects the model_config.extra setting on the provided model.
> That is, if model_config.extra == 'allow', then all extra passed values are added to the model instance’s __dict__
> and __pydantic_extra__ fields. If model_config.extra == 'ignore' (the default), then all extra passed values are ignored.
> Because no validation is performed with a call to model_construct(), having model_config.extra == 'forbid' does not result in
> an error if extra values are passed, but they will be ignored.
| Parameter | Type | Description |
|---|---|---|
_fields_set |
set[str] | None |
A set of field names that were originally explicitly set during instantiation. If provided, this is directly used for the [model_fields_set][pydantic.BaseModel.model_fields_set] attribute. Otherwise, the field names from the values argument will be used. |
values |
Any |
Trusted or pre-validated data dictionary. |
model_copy()
def model_copy(
update: Mapping[str, Any] | None,
deep: bool,
) -> Self!!! abstract “Usage Documentation”
model_copy
Returns a copy of the model.
> [!NOTE]
> The underlying instance’s [__dict__][object.dict] attribute is copied. This
> might have unexpected side effects if you store anything in it, on top of the model
> fields (e.g. the value of [cached properties][functools.cached_property]).
| Parameter | Type | Description |
|---|---|---|
update |
Mapping[str, Any] | None |
|
deep |
bool |
Set to True to make a deep copy of the model. |
model_dump()
def model_dump(
mode: Literal['json', 'python'] | str,
include: IncEx | None,
exclude: IncEx | None,
context: Any | None,
by_alias: bool | None,
exclude_unset: bool,
exclude_defaults: bool,
exclude_none: bool,
exclude_computed_fields: bool,
round_trip: bool,
warnings: bool | Literal['none', 'warn', 'error'],
fallback: Callable[[Any], Any] | None,
serialize_as_any: bool,
) -> dict[str, Any]!!! abstract “Usage Documentation”
model_dump
Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
| Parameter | Type | Description |
|---|---|---|
mode |
Literal['json', 'python'] | str |
The mode in which to_python should run. If mode is ‘json’, the output will only contain JSON serializable types. If mode is ‘python’, the output may contain non-JSON-serializable Python objects. |
include |
IncEx | None |
A set of fields to include in the output. |
exclude |
IncEx | None |
A set of fields to exclude from the output. |
context |
Any | None |
Additional context to pass to the serializer. |
by_alias |
bool | None |
Whether to use the field’s alias in the dictionary key if defined. |
exclude_unset |
bool |
Whether to exclude fields that have not been explicitly set. |
exclude_defaults |
bool |
Whether to exclude fields that are set to their default value. |
exclude_none |
bool |
Whether to exclude fields that have a value of None. |
exclude_computed_fields |
bool |
Whether to exclude computed fields. While this can be useful for round-tripping, it is usually recommended to use the dedicated round_trip parameter instead. |
round_trip |
bool |
If True, dumped values should be valid as input for non-idempotent types such as Json[T]. |
warnings |
bool | Literal['none', 'warn', 'error'] |
How to handle serialization errors. False/“none” ignores them, True/“warn” logs errors, “error” raises a [PydanticSerializationError][pydantic_core.PydanticSerializationError]. |
fallback |
Callable[[Any], Any] | None |
A function to call when an unknown value is encountered. If not provided, a [PydanticSerializationError][pydantic_core.PydanticSerializationError] error is raised. |
serialize_as_any |
bool |
Whether to serialize fields with duck-typing serialization behavior. |
model_dump_json()
def model_dump_json(
indent: int | None,
ensure_ascii: bool,
include: IncEx | None,
exclude: IncEx | None,
context: Any | None,
by_alias: bool | None,
exclude_unset: bool,
exclude_defaults: bool,
exclude_none: bool,
exclude_computed_fields: bool,
round_trip: bool,
warnings: bool | Literal['none', 'warn', 'error'],
fallback: Callable[[Any], Any] | None,
serialize_as_any: bool,
) -> str!!! abstract “Usage Documentation”
model_dump_json
Generates a JSON representation of the model using Pydantic’s to_json method.
| Parameter | Type | Description |
|---|---|---|
indent |
int | None |
Indentation to use in the JSON output. If None is passed, the output will be compact. |
ensure_ascii |
bool |
If True, the output is guaranteed to have all incoming non-ASCII characters escaped. If False (the default), these characters will be output as-is. |
include |
IncEx | None |
Field(s) to include in the JSON output. |
exclude |
IncEx | None |
Field(s) to exclude from the JSON output. |
context |
Any | None |
Additional context to pass to the serializer. |
by_alias |
bool | None |
Whether to serialize using field aliases. |
exclude_unset |
bool |
Whether to exclude fields that have not been explicitly set. |
exclude_defaults |
bool |
Whether to exclude fields that are set to their default value. |
exclude_none |
bool |
Whether to exclude fields that have a value of None. |
exclude_computed_fields |
bool |
Whether to exclude computed fields. While this can be useful for round-tripping, it is usually recommended to use the dedicated round_trip parameter instead. |
round_trip |
bool |
If True, dumped values should be valid as input for non-idempotent types such as Json[T]. |
warnings |
bool | Literal['none', 'warn', 'error'] |
How to handle serialization errors. False/“none” ignores them, True/“warn” logs errors, “error” raises a [PydanticSerializationError][pydantic_core.PydanticSerializationError]. |
fallback |
Callable[[Any], Any] | None |
A function to call when an unknown value is encountered. If not provided, a [PydanticSerializationError][pydantic_core.PydanticSerializationError] error is raised. |
serialize_as_any |
bool |
Whether to serialize fields with duck-typing serialization behavior. |
model_json_schema()
def model_json_schema(
by_alias: bool,
ref_template: str,
schema_generator: type[GenerateJsonSchema],
mode: JsonSchemaMode,
union_format: Literal['any_of', 'primitive_type_array'],
) -> dict[str, Any]Generates a JSON schema for a model class.
| Parameter | Type | Description |
|---|---|---|
by_alias |
bool |
Whether to use attribute aliases or not. |
ref_template |
str |
The reference template. - 'any_of': Use the
anyOf keyword to combine schemas (the default). - 'primitive_type_array': Use the
type keyword as an array of strings, containing each type of the combination. If any of the schemas is not a primitive type (string, boolean, null, integer or number) or contains constraints/metadata, falls back to any_of. |
schema_generator |
type[GenerateJsonSchema] |
To override the logic used to generate the JSON schema, as a subclass of GenerateJsonSchema with your desired modifications |
mode |
JsonSchemaMode |
The mode in which to generate the schema. |
union_format |
Literal['any_of', 'primitive_type_array'] |
model_parametrized_name()
def model_parametrized_name(
params: tuple[type[Any], ...],
) -> strCompute the class name for parametrizations of generic classes.
This method can be overridden to achieve a custom naming scheme for generic BaseModels.
| Parameter | Type | Description |
|---|---|---|
params |
tuple[type[Any], ...] |
Tuple of types of the class. Given a generic class Model with 2 type variables and a concrete model Model[str, int], the value (str, int) would be passed to params. |
model_post_init()
def model_post_init(
context: Any,
)This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that’s what pydantic-core passes when calling it.
| Parameter | Type | Description |
|---|---|---|
context |
Any |
The context. |
model_rebuild()
def model_rebuild(
force: bool,
raise_errors: bool,
_parent_namespace_depth: int,
_types_namespace: MappingNamespace | None,
) -> bool | NoneTry to rebuild the pydantic-core schema for the model.
This may be necessary when one of the annotations is a ForwardRef which could not be resolved during the initial attempt to build the schema, and automatic rebuilding fails.
| Parameter | Type | Description |
|---|---|---|
force |
bool |
Whether to force the rebuilding of the model schema, defaults to False. |
raise_errors |
bool |
Whether to raise errors, defaults to True. |
_parent_namespace_depth |
int |
The depth level of the parent namespace, defaults to 2. |
_types_namespace |
MappingNamespace | None |
The types namespace, defaults to None. |
model_validate()
def model_validate(
obj: Any,
strict: bool | None,
extra: ExtraValues | None,
from_attributes: bool | None,
context: Any | None,
by_alias: bool | None,
by_name: bool | None,
) -> SelfValidate a pydantic model instance.
| Parameter | Type | Description |
|---|---|---|
obj |
Any |
The object to validate. |
strict |
bool | None |
Whether to enforce types strictly. |
extra |
ExtraValues | None |
Whether to ignore, allow, or forbid extra data during model validation. See the [extra configuration value][pydantic.ConfigDict.extra] for details. |
from_attributes |
bool | None |
Whether to extract data from object attributes. |
context |
Any | None |
Additional context to pass to the validator. |
by_alias |
bool | None |
Whether to use the field’s alias when validating against the provided input data. |
by_name |
bool | None |
Whether to use the field’s name when validating against the provided input data. |
model_validate_json()
def model_validate_json(
json_data: str | bytes | bytearray,
strict: bool | None,
extra: ExtraValues | None,
context: Any | None,
by_alias: bool | None,
by_name: bool | None,
) -> Self!!! abstract “Usage Documentation” JSON Parsing
Validate the given JSON data against the Pydantic model.
| Parameter | Type | Description |
|---|---|---|
json_data |
str | bytes | bytearray |
The JSON data to validate. |
strict |
bool | None |
Whether to enforce types strictly. |
extra |
ExtraValues | None |
Whether to ignore, allow, or forbid extra data during model validation. See the [extra configuration value][pydantic.ConfigDict.extra] for details. |
context |
Any | None |
Extra variables to pass to the validator. |
by_alias |
bool | None |
Whether to use the field’s alias when validating against the provided input data. |
by_name |
bool | None |
Whether to use the field’s name when validating against the provided input data. |
model_validate_strings()
def model_validate_strings(
obj: Any,
strict: bool | None,
extra: ExtraValues | None,
context: Any | None,
by_alias: bool | None,
by_name: bool | None,
) -> SelfValidate the given object with string data against the Pydantic model.
| Parameter | Type | Description |
|---|---|---|
obj |
Any |
The object containing string data to validate. |
strict |
bool | None |
Whether to enforce types strictly. |
extra |
ExtraValues | None |
Whether to ignore, allow, or forbid extra data during model validation. See the [extra configuration value][pydantic.ConfigDict.extra] for details. |
context |
Any | None |
Extra variables to pass to the validator. |
by_alias |
bool | None |
Whether to use the field’s alias when validating against the provided input data. |
by_name |
bool | None |
Whether to use the field’s name when validating against the provided input data. |
open()
def open(
dataframe_type: Type[DF],
)Load the handler if needed. For the use case like: @task def t1(df: DataFrame): import pandas as pd df.open(pd.DataFrame).all()
pandas is imported inside the task, so panda handler won’t be loaded during deserialization in type engine.
| Parameter | Type | Description |
|---|---|---|
dataframe_type |
Type[DF] |
parse_file()
def parse_file(
path: str | Path,
content_type: str | None,
encoding: str,
proto: DeprecatedParseProtocol | None,
allow_pickle: bool,
) -> Self| Parameter | Type | Description |
|---|---|---|
path |
str | Path |
|
content_type |
str | None |
|
encoding |
str |
|
proto |
DeprecatedParseProtocol | None |
|
allow_pickle |
bool |
parse_obj()
def parse_obj(
obj: Any,
) -> Self| Parameter | Type | Description |
|---|---|---|
obj |
Any |
parse_raw()
def parse_raw(
b: str | bytes,
content_type: str | None,
encoding: str,
proto: DeprecatedParseProtocol | None,
allow_pickle: bool,
) -> Self| Parameter | Type | Description |
|---|---|---|
b |
str | bytes |
|
content_type |
str | None |
|
encoding |
str |
|
proto |
DeprecatedParseProtocol | None |
|
allow_pickle |
bool |
schema()
def schema(
by_alias: bool,
ref_template: str,
) -> Dict[str, Any]| Parameter | Type | Description |
|---|---|---|
by_alias |
bool |
|
ref_template |
str |
schema_json()
def schema_json(
by_alias: bool,
ref_template: str,
dumps_kwargs: Any,
) -> str| Parameter | Type | Description |
|---|---|---|
by_alias |
bool |
|
ref_template |
str |
|
dumps_kwargs |
Any |
serialize_dataframe()
def serialize_dataframe()set_literal()
def set_literal(
expected: types_pb2.LiteralType,
)A public wrapper method to set the DataFrame Literal.
This method provides external access to the internal _set_literal method.
| Parameter | Type | Description |
|---|---|---|
expected |
types_pb2.LiteralType |
update_forward_refs()
def update_forward_refs(
localns: Any,
)| Parameter | Type | Description |
|---|---|---|
localns |
Any |
validate()
def validate(
value: Any,
) -> Self| Parameter | Type | Description |
|---|---|---|
value |
Any |
Properties
| Property | Type | Description |
|---|---|---|
literal |
None |
|
metadata |
None |
|
model_extra |
None |
Get extra fields set during validation. Returns: A dictionary of extra fields, or None if config.extra is not set to "allow". |
model_fields_set |
None |
Returns the set of fields that have been explicitly set on this model instance. Returns: A set of strings representing the fields that have been set, i.e. that were not filled from defaults. |
val |
None |