{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "GeoQuery Data Catalog Entry", "description": "Schema for dataset metadata in the GeoQuery platform catalog", "type": "object", "required": [ "path", "columns", "category", "format" ], "properties": { "path": { "type": "string", "description": "Relative path to the data file from the data directory" }, "description": { "type": "string", "description": "Auto-generated basic description (e.g., 'Data from hdx/health.geojson')" }, "semantic_description": { "type": [ "string", "null" ], "description": "LLM-generated rich description explaining the dataset's contents and use cases" }, "tags": { "type": "array", "items": { "type": "string" }, "description": "Searchable tags for categorization (e.g., ['health', 'facilities', 'infrastructure'])" }, "data_type": { "type": "string", "enum": [ "static", "semi-static", "realtime" ], "description": "How frequently the data changes", "default": "static" }, "update_frequency": { "type": [ "string", "null" ], "enum": [ null, "yearly", "monthly", "weekly", "daily", "hourly", "realtime" ], "description": "Expected update frequency for the dataset" }, "columns": { "type": "array", "items": { "type": "string" }, "description": "List of column names in the dataset" }, "row_count": { "type": [ "integer", "null" ], "description": "Number of features/rows in the dataset" }, "category": { "type": "string", "description": "Source category (base, osm, hdx, inec, custom)" }, "format": { "type": "string", "enum": [ "geojson", "shapefile", "geoparquet", "csv" ], "description": "File format of the source data" }, "geometry_type": { "type": [ "string", "null" ], "enum": [ null, "Point", "MultiPoint", "LineString", "MultiLineString", "Polygon", "MultiPolygon" ], "description": "Type of geometries in the dataset" }, "bbox": { "type": [ "array", "null" ], "items": { "type": "number" }, "minItems": 4, "maxItems": 4, "description": "Bounding box [minLon, minLat, maxLon, maxLat]" }, "source": { "type": [ "string", "null" ], "description": "Original source of the data (e.g., 'OpenStreetMap', 'INEC Census 2023')" }, "license": { "type": [ "string", "null" ], "description": "Data license (e.g., 'ODbL', 'CC-BY-4.0', 'Public Domain')" }, "last_indexed": { "type": "string", "format": "date-time", "description": "ISO timestamp when the dataset was last indexed" }, "last_enriched": { "type": [ "string", "null" ], "format": "date-time", "description": "ISO timestamp when LLM enrichment was last run" } } }