File size: 4,184 Bytes
4851501 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "GeoQuery Data Catalog Entry",
"description": "Schema for dataset metadata in the GeoQuery platform catalog",
"type": "object",
"required": [
"path",
"columns",
"category",
"format"
],
"properties": {
"path": {
"type": "string",
"description": "Relative path to the data file from the data directory"
},
"description": {
"type": "string",
"description": "Auto-generated basic description (e.g., 'Data from hdx/health.geojson')"
},
"semantic_description": {
"type": [
"string",
"null"
],
"description": "LLM-generated rich description explaining the dataset's contents and use cases"
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Searchable tags for categorization (e.g., ['health', 'facilities', 'infrastructure'])"
},
"data_type": {
"type": "string",
"enum": [
"static",
"semi-static",
"realtime"
],
"description": "How frequently the data changes",
"default": "static"
},
"update_frequency": {
"type": [
"string",
"null"
],
"enum": [
null,
"yearly",
"monthly",
"weekly",
"daily",
"hourly",
"realtime"
],
"description": "Expected update frequency for the dataset"
},
"columns": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of column names in the dataset"
},
"row_count": {
"type": [
"integer",
"null"
],
"description": "Number of features/rows in the dataset"
},
"category": {
"type": "string",
"description": "Source category (base, osm, hdx, inec, custom)"
},
"format": {
"type": "string",
"enum": [
"geojson",
"shapefile",
"geoparquet",
"csv"
],
"description": "File format of the source data"
},
"geometry_type": {
"type": [
"string",
"null"
],
"enum": [
null,
"Point",
"MultiPoint",
"LineString",
"MultiLineString",
"Polygon",
"MultiPolygon"
],
"description": "Type of geometries in the dataset"
},
"bbox": {
"type": [
"array",
"null"
],
"items": {
"type": "number"
},
"minItems": 4,
"maxItems": 4,
"description": "Bounding box [minLon, minLat, maxLon, maxLat]"
},
"source": {
"type": [
"string",
"null"
],
"description": "Original source of the data (e.g., 'OpenStreetMap', 'INEC Census 2023')"
},
"license": {
"type": [
"string",
"null"
],
"description": "Data license (e.g., 'ODbL', 'CC-BY-4.0', 'Public Domain')"
},
"last_indexed": {
"type": "string",
"format": "date-time",
"description": "ISO timestamp when the dataset was last indexed"
},
"last_enriched": {
"type": [
"string",
"null"
],
"format": "date-time",
"description": "ISO timestamp when LLM enrichment was last run"
}
}
} |