dlt.destinations.impl.filesystem.filesystem
FilesystemLoadJob Objects
class FilesystemLoadJob(RunnableLoadJob)
make_remote_path
def make_remote_path() -> str
Returns path on the remote filesystem to which copy the file, without scheme. For local filesystem a native path is used
make_remote_url
def make_remote_url() -> str
Returns path on a remote filesystem as a full url including scheme.
FilesystemClient Objects
class FilesystemClient(FSClientBase, WithSqlClient, JobClientBase,
WithStagingDataset, WithStateSync, SupportsOpenTables,
WithTableReflection)
storage_versions
@property
def storage_versions() -> Tuple[int, int]
Returns cached storage versions, loading it once from filesystem if not already cached
init_file_path
@property
def init_file_path() -> str
Returns the path to the init file for the current dataset
dataset_path
@property
def dataset_path() -> str
A path within a bucket to tables in a dataset NOTE: dataset_name changes if with_staging_dataset is active
migrate_storage
def migrate_storage(from_version: int, to_version: int) -> None
Migrate storage from one version to another
get_storage_versions
def get_storage_versions() -> Tuple[int, int]
Returns initial and current storage versions.
- If the init file is empty, we assume legacy version 1 where .gz extension was not added to compressed files.
- For any other non-empty content we parse it as json, expect version key to have a supported value.
get_storage_tables
def get_storage_tables(
table_names: Iterable[str]
) -> Iterable[Tuple[str, TTableSchemaColumns]]
Yield (table_name, column_schemas) pairs for tables that have files in storage.
For Delta and Iceberg tables, the columns present in the actual table metadata are returned. For tables using regular file formats, the column schemas come from the dlt schema instead, since their real schema cannot be reflected directly.
truncate_tables
def truncate_tables(table_names: List[str]) -> None
Truncate a set of regular tables with given table_names
get_table_dir
def get_table_dir(table_name: str, remote: bool = False) -> str
Returns a directory containing table files, ending with separator. Note that many tables can share the same table dir
get_table_prefix
def get_table_prefix(table_name: str) -> str
For table prefixes that are folders, trailing separator will be preserved
get_table_dirs
def get_table_dirs(table_names: Iterable[str],
remote: bool = False) -> List[str]
Gets directories where table data is stored.
list_table_files
def list_table_files(table_name: str) -> List[str]
gets list of files associated with one table
list_files_with_prefixes
def list_files_with_prefixes(table_dir: str, prefixes: List[str]) -> List[str]
returns all files in a directory that match given prefixes
make_remote_url
def make_remote_url(remote_path: str) -> str
Returns uri to the remote filesystem to which copy the file
get_stored_schema
def get_stored_schema(schema_name: str = None) -> Optional[StorageSchemaInfo]
Retrieves newest schema from destination storage
load_open_table
def load_open_table(table_format: TTableFormat, table_name: str,
**kwargs: Any) -> Any
Locates, loads and returns native table client for table table_name in delta or iceberg formats
get_open_table_catalog
def get_open_table_catalog(table_format: TTableFormat,
catalog_name: str = None) -> Any
Gets a native catalog for a table table_name with format table_format
Returns: currently pyiceberg Catalog is supported
get_open_table_location
def get_open_table_location(table_format: TTableFormat,
table_name: str) -> str
All tables have location, also those in "native" table format. Native format in case of filesystem is a set of parquet/csv/jsonl files where a table may be placed in a separate folder or share common prefix define in the layout. Locations of native tables will are normalized to include trailing separator if path is a "folder" (includes buckets) Note: location is fully formed url