filmov
tv
Importing JSON Data and Geodata (GeoJSON) in Exasol

Показать описание
create or replace table universities(university varchar(2000), geo geometry(4326));
select * from exa_spatial_ref_sys;
insert into universities values('OTH', 'POINT(12.095089 49.003674)');
select * from universities;
create or replace table geo_import(v varchar(2000000));
select * from geo_import;
-- ChatGPT: linux command to remove new lines from file, put result in new 1-line file
truncate table geo_import;
import into geo_import from
column separator = '0x01' column delimiter = '0x02'; -- dummy separaters / delimiters to import a whole line as one column value
select substr(v, 0, 1000) from geo_import;
-- JSON_EXTRACT: Examples
select json_extract('{"data":[{"a":5, "b":10}, {"a":7, "b":14}]}',
'$.data#.a', '$.data#.b')
emits (a int, b int);
select json_extract(v, '$.features#.properties.NAME_0',
'$.features#.properties.NAME_1',
'$.features#.properties.NAME_2',
'$.features#.properties.NAME_3'
-- ,'$.features#.geometry' -- does not work
)
emits (country varchar(2000000), area varchar(2000000),
district varchar(2000000), city varchar(2000000)
-- , geo varchar(2000000)
) from geo_import;
create or replace python3 scalar script CitiesFromGeoJSON(g varchar(2000000))
emits (country varchar(2000000), area varchar(2000000),
district varchar(2000000), city varchar(2000000),
geo varchar(2000000)) as
import json
def point(coordinates):
return str(coordinates[0])+" "+str(coordinates[1])
def linestring(coordinates):
return "("+ (",".join(map(point,coordinates))) +")"
def polygon(coordinates):
return "("+ ",".join(map(linestring,coordinates))+")"
def geometry(geo):
if geo["type"] == "Point":
return "POINT ("+point(geo["coordinates"])+")"
if geo["type"] == "MultiPoint":
return "MULTIPOINT(" + ",".join(map(point,geo["coordinates"]))+")"
if geo["type"] == "LineString":
return "LINESTRING" + linestring(geo["coordinates"])
if geo["type"] == "MultiLineString":
return "MULTILINESTRING("+ ",".join(map(linestring,geo["coordinates"]))+")"
if geo["type"] == "Polygon":
return "POLYGON" + polygon(geo["coordinates"])
if geo["type"] == "MultiPolygon":
return "MULTIPOLYGON("+ ",".join(map(polygon,geo["coordinates"]))+")"
if geo["type"] == "GeometryCollection":
return "GEOMETRYCOLLECTION(" + ",".join(map(geometry, geo["geometries"])) + ")"
def run(ctx):
for feature in json_data['features']:
n0 = feature['properties']['NAME_0']
n1 = feature['properties']['NAME_1']
n2 = feature['properties']['NAME_2']
n3 = feature['properties']['NAME_3']
geo = feature['geometry']
/;
select CitiesFromGeoJSON(v) from GEO_IMPORT;
create or replace table cities as
select country, area, district, city,
cast(geo as geometry(4326)) as geo
from (select CitiesFromGeoJSON(v) from GEO_IMPORT);
select * from cities limit 10;
select * from cities where district = 'Oberpfalz';
-- geo join
from universities u join cities c
-- Profiling shows that geo index on cities(geo) is used
select * from exa_spatial_ref_sys;
insert into universities values('OTH', 'POINT(12.095089 49.003674)');
select * from universities;
create or replace table geo_import(v varchar(2000000));
select * from geo_import;
-- ChatGPT: linux command to remove new lines from file, put result in new 1-line file
truncate table geo_import;
import into geo_import from
column separator = '0x01' column delimiter = '0x02'; -- dummy separaters / delimiters to import a whole line as one column value
select substr(v, 0, 1000) from geo_import;
-- JSON_EXTRACT: Examples
select json_extract('{"data":[{"a":5, "b":10}, {"a":7, "b":14}]}',
'$.data#.a', '$.data#.b')
emits (a int, b int);
select json_extract(v, '$.features#.properties.NAME_0',
'$.features#.properties.NAME_1',
'$.features#.properties.NAME_2',
'$.features#.properties.NAME_3'
-- ,'$.features#.geometry' -- does not work
)
emits (country varchar(2000000), area varchar(2000000),
district varchar(2000000), city varchar(2000000)
-- , geo varchar(2000000)
) from geo_import;
create or replace python3 scalar script CitiesFromGeoJSON(g varchar(2000000))
emits (country varchar(2000000), area varchar(2000000),
district varchar(2000000), city varchar(2000000),
geo varchar(2000000)) as
import json
def point(coordinates):
return str(coordinates[0])+" "+str(coordinates[1])
def linestring(coordinates):
return "("+ (",".join(map(point,coordinates))) +")"
def polygon(coordinates):
return "("+ ",".join(map(linestring,coordinates))+")"
def geometry(geo):
if geo["type"] == "Point":
return "POINT ("+point(geo["coordinates"])+")"
if geo["type"] == "MultiPoint":
return "MULTIPOINT(" + ",".join(map(point,geo["coordinates"]))+")"
if geo["type"] == "LineString":
return "LINESTRING" + linestring(geo["coordinates"])
if geo["type"] == "MultiLineString":
return "MULTILINESTRING("+ ",".join(map(linestring,geo["coordinates"]))+")"
if geo["type"] == "Polygon":
return "POLYGON" + polygon(geo["coordinates"])
if geo["type"] == "MultiPolygon":
return "MULTIPOLYGON("+ ",".join(map(polygon,geo["coordinates"]))+")"
if geo["type"] == "GeometryCollection":
return "GEOMETRYCOLLECTION(" + ",".join(map(geometry, geo["geometries"])) + ")"
def run(ctx):
for feature in json_data['features']:
n0 = feature['properties']['NAME_0']
n1 = feature['properties']['NAME_1']
n2 = feature['properties']['NAME_2']
n3 = feature['properties']['NAME_3']
geo = feature['geometry']
/;
select CitiesFromGeoJSON(v) from GEO_IMPORT;
create or replace table cities as
select country, area, district, city,
cast(geo as geometry(4326)) as geo
from (select CitiesFromGeoJSON(v) from GEO_IMPORT);
select * from cities limit 10;
select * from cities where district = 'Oberpfalz';
-- geo join
from universities u join cities c
-- Profiling shows that geo index on cities(geo) is used