GET_METADATA
返回 Parquet 文件的元数据。元数据包括行组的数量和大小、列名以及有关块和压缩的信息。元数据以 JSON 形式返回。
此函数检查一个文件。Parquet 数据通常跨越单个目录中的多个文件;选择一个。该函数不接受目录名称作为实参。
这是元函数。您必须在顶级 SELECT 语句中调用元函数。
行为类型
易变语法
GET_METADATA( 'filename' )
参数
filename
- Parquet 文件的名称。任何对 COPY 有效的路径都对该函数有效。此函数不对其他格式的文件进行操作。
特权
在 USER 可访问的存储位置上具有 READ 权限的超级用户或非超级用户(请参阅 GRANT(存储位置))。
示例
您必须使用单个文件而不是目录或 glob 调用此函数:
=> SELECT GET_METADATA('/data/emp-row.parquet');
GET_METADATA
----------------------------------------------------------------------------------------------------
schema:
required group field_id=-1 spark_schema {
optional int32 field_id=-1 employeeID;
optional group field_id=-1 personal {
optional binary field_id=-1 name (String);
optional group field_id=-1 address {
optional binary field_id=-1 street (String);
optional binary field_id=-1 city (String);
optional int32 field_id=-1 zipcode;
}
optional int32 field_id=-1 taxID;
}
optional binary field_id=-1 department (String);
}
data page version:
data page v1
metadata:
{
"FileName": "/data/emp-row.parquet",
"FileFormat": "Parquet",
"Version": "1.0",
"CreatedBy": "parquet-mr version 1.10.1 (build a89df8f9932b6ef6633d06069e50c9b7970bebd1)",
"TotalRows": "4",
"NumberOfRowGroups": "1",
"NumberOfRealColumns": "3",
"NumberOfColumns": "7",
"Columns": [
{ "Id": "0", "Name": "employeeID", "PhysicalType": "INT32", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
{ "Id": "1", "Name": "personal.name", "PhysicalType": "BYTE_ARRAY", "ConvertedType": "UTF8", "LogicalType": {"Type": "String"} },
{ "Id": "2", "Name": "personal.address.street", "PhysicalType": "BYTE_ARRAY", "ConvertedType": "UTF8", "LogicalType": {"Type": "String"} },
{ "Id": "3", "Name": "personal.address.city", "PhysicalType": "BYTE_ARRAY", "ConvertedType": "UTF8", "LogicalType": {"Type": "String"} },
{ "Id": "4", "Name": "personal.address.zipcode", "PhysicalType": "INT32", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
{ "Id": "5", "Name": "personal.taxID", "PhysicalType": "INT32", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} },
{ "Id": "6", "Name": "department", "PhysicalType": "BYTE_ARRAY", "ConvertedType": "UTF8", "LogicalType": {"Type": "String"} }
],
"RowGroups": [
{
"Id": "0", "TotalBytes": "642", "TotalCompressedBytes": "0", "Rows": "4",
"ColumnChunks": [
{"Id": "0", "Values": "4", "StatsSet": "True", "Stats": {"NumNulls": "0", "DistinctValues": "0", "Max": "51513", "Min": "17103" },
"Compression": "SNAPPY", "Encodings": "PLAIN RLE BIT_PACKED ", "UncompressedSize": "67", "CompressedSize": "69" },
{"Id": "1", "Values": "4", "StatsSet": "True", "Stats": {"NumNulls": "0", "DistinctValues": "0", "Max": "Sheldon Cooper", "Min": "Howard Wolowitz" },
"Compression": "SNAPPY", "Encodings": "PLAIN RLE BIT_PACKED ", "UncompressedSize": "142", "CompressedSize": "145" },
{"Id": "2", "Values": "4", "StatsSet": "True", "Stats": {"NumNulls": "0", "DistinctValues": "0", "Max": "52 Broad St", "Min": "100 Main St Apt 4A" },
"Compression": "SNAPPY", "Encodings": "PLAIN RLE BIT_PACKED ", "UncompressedSize": "139", "CompressedSize": "123" },
{"Id": "3", "Values": "4", "StatsSet": "True", "Stats": {"NumNulls": "0", "DistinctValues": "0", "Max": "Pasadena", "Min": "Pasadena" },
"Compression": "SNAPPY", "Encodings": "RLE PLAIN_DICTIONARY BIT_PACKED ", "UncompressedSize": "95", "CompressedSize": "99" },
{"Id": "4", "Values": "4", "StatsSet": "True", "Stats": {"NumNulls": "0", "DistinctValues": "0", "Max": "91021", "Min": "91001" },
"Compression": "SNAPPY", "Encodings": "PLAIN RLE BIT_PACKED ", "UncompressedSize": "68", "CompressedSize": "70" },
{"Id": "5", "Values": "4", "StatsSet": "True", "Stats": {"NumNulls": "4", "DistinctValues": "0", "Max": "0", "Min": "0" },
"Compression": "SNAPPY", "Encodings": "PLAIN RLE BIT_PACKED ", "UncompressedSize": "28", "CompressedSize": "30" },
{"Id": "6", "Values": "4", "StatsSet": "True", "Stats": {"NumNulls": "0", "DistinctValues": "0", "Max": "Physics", "Min": "Astronomy" },
"Compression": "SNAPPY", "Encodings": "RLE PLAIN_DICTIONARY BIT_PACKED ", "UncompressedSize": "103", "CompressedSize": "107" }
]
}
]
}
(1 row)