GeoQuery / backend /data /catalog_schema.json
GerardCB's picture
Deploy to Spaces (Final Clean)
4851501
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "GeoQuery Data Catalog Entry",
"description": "Schema for dataset metadata in the GeoQuery platform catalog",
"type": "object",
"required": [
"path",
"columns",
"category",
"format"
],
"properties": {
"path": {
"type": "string",
"description": "Relative path to the data file from the data directory"
},
"description": {
"type": "string",
"description": "Auto-generated basic description (e.g., 'Data from hdx/health.geojson')"
},
"semantic_description": {
"type": [
"string",
"null"
],
"description": "LLM-generated rich description explaining the dataset's contents and use cases"
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Searchable tags for categorization (e.g., ['health', 'facilities', 'infrastructure'])"
},
"data_type": {
"type": "string",
"enum": [
"static",
"semi-static",
"realtime"
],
"description": "How frequently the data changes",
"default": "static"
},
"update_frequency": {
"type": [
"string",
"null"
],
"enum": [
null,
"yearly",
"monthly",
"weekly",
"daily",
"hourly",
"realtime"
],
"description": "Expected update frequency for the dataset"
},
"columns": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of column names in the dataset"
},
"row_count": {
"type": [
"integer",
"null"
],
"description": "Number of features/rows in the dataset"
},
"category": {
"type": "string",
"description": "Source category (base, osm, hdx, inec, custom)"
},
"format": {
"type": "string",
"enum": [
"geojson",
"shapefile",
"geoparquet",
"csv"
],
"description": "File format of the source data"
},
"geometry_type": {
"type": [
"string",
"null"
],
"enum": [
null,
"Point",
"MultiPoint",
"LineString",
"MultiLineString",
"Polygon",
"MultiPolygon"
],
"description": "Type of geometries in the dataset"
},
"bbox": {
"type": [
"array",
"null"
],
"items": {
"type": "number"
},
"minItems": 4,
"maxItems": 4,
"description": "Bounding box [minLon, minLat, maxLon, maxLat]"
},
"source": {
"type": [
"string",
"null"
],
"description": "Original source of the data (e.g., 'OpenStreetMap', 'INEC Census 2023')"
},
"license": {
"type": [
"string",
"null"
],
"description": "Data license (e.g., 'ODbL', 'CC-BY-4.0', 'Public Domain')"
},
"last_indexed": {
"type": "string",
"format": "date-time",
"description": "ISO timestamp when the dataset was last indexed"
},
"last_enriched": {
"type": [
"string",
"null"
],
"format": "date-time",
"description": "ISO timestamp when LLM enrichment was last run"
}
}
}