Converting Json to SQL table
Analog to this example: https://github.com/zolekode/json-to-tables/blob/master/example.py
Use the following script:
The following script exports the data as HTML, but you might as well export it as SQL.
table_maker.save_tables(YOUR_PATH, export_as="sql", sql_connection=YOUR_CONNECTION)
# See the code below
import json
from extent_table import ExtentTable
from table_maker import TableMaker
Volumes = [
{
"AvailabilityZone": "us-east-1a",
"Attachments": [
{
"AttachTime": "2013-12-18T22:35:00.000Z",
"InstanceId": "i-1234567890abcdef0",
"VolumeId": "vol-049df61146c4d7901",
"State": "attached",
"DeleteOnTermination": "true",
"Device": "/dev/sda1"
}
],
"Tags": [
{
"Value": "DBJanitor-Private",
"Key": "Name"
},
{
"Value": "DBJanitor",
"Key": "Owner"
},
{
"Value": "Database",
"Key": "Product"
},
{
"Value": "DB Janitor",
"Key": "Portfolio"
},
{
"Value": "DB Service",
"Key": "Service"
}
],
"VolumeType": "standard",
"VolumeId": "vol-049df61146c4d7901",
"State": "in-use",
"SnapshotId": "snap-1234567890abcdef0",
"CreateTime": "2013-12-18T22:35:00.084Z",
"Size": 8
},
{
"AvailabilityZone": "us-east-1a",
"Attachments": [],
"VolumeType": "io1",
"VolumeId": "vol-1234567890abcdef0",
"State": "available",
"Iops": 1000,
"SnapshotId": "null",
"CreateTime": "2014-02-27T00:02:41.791Z",
"Size": 100
}
]
volumes = json.dumps(Volumes)
volumes = json.loads(volumes)
extent_table = ExtentTable()
table_maker = TableMaker(extent_table)
table_maker.convert_json_objects_to_tables(volumes, "volumes")
table_maker.show_tables(8)
table_maker.save_tables("./", export_as="html") # you can also pass in export_as="sql" or "csv". In the case of sql, there is a parameter to pass the engine.
Output in HTML:
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>ID</th>
<th>AvailabilityZone</th>
<th>VolumeType</th>
<th>VolumeId</th>
<th>State</th>
<th>SnapshotId</th>
<th>CreateTime</th>
<th>Size</th>
<th>Iops</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>us-east-1a</td>
<td>standard</td>
<td>vol-049df61146c4d7901</td>
<td>in-use</td>
<td>snap-1234567890abcdef0</td>
<td>2013-12-18T22:35:00.084Z</td>
<td>8</td>
<td>None</td>
</tr>
<tr>
<td>1</td>
<td>us-east-1a</td>
<td>io1</td>
<td>vol-1234567890abcdef0</td>
<td>available</td>
<td>null</td>
<td>2014-02-27T00:02:41.791Z</td>
<td>100</td>
<td>1000</td>
</tr>
<tr>
<td>2</td>
<td>None</td>
<td>None</td>
<td>None</td>
<td>None</td>
<td>None</td>
<td>None</td>
<td>None</td>
<td>None</td>
</tr>
</tbody>
</table>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>ID</th>
<th>PARENT_ID</th>
<th>is_scalar</th>
<th>scalar</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>0</td>
<td>False</td>
<td>None</td>
</tr>
</tbody>
</table>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>ID</th>
<th>AttachTime</th>
<th>InstanceId</th>
<th>VolumeId</th>
<th>State</th>
<th>DeleteOnTermination</th>
<th>Device</th>
<th>PARENT_ID</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>2013-12-18T22:35:00.000Z</td>
<td>i-1234567890abcdef0</td>
<td>vol-049df61146c4d7901</td>
<td>attached</td>
<td>true</td>
<td>/dev/sda1</td>
<td>0</td>
</tr>
<tr>
<td>1</td>
<td>None</td>
<td>None</td>
<td>None</td>
<td>None</td>
<td>None</td>
<td>None</td>
<td>None</td>
</tr>
</tbody>
</table>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>ID</th>
<th>PARENT_ID</th>
<th>is_scalar</th>
<th>scalar</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>0</td>
<td>False</td>
<td>None</td>
</tr>
<tr>
<td>1</td>
<td>0</td>
<td>False</td>
<td>None</td>
</tr>
<tr>
<td>2</td>
<td>0</td>
<td>False</td>
<td>None</td>
</tr>
<tr>
<td>3</td>
<td>0</td>
<td>False</td>
<td>None</td>
</tr>
<tr>
<td>4</td>
<td>0</td>
<td>False</td>
<td>None</td>
</tr>
</tbody>
</table>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>ID</th>
<th>Value</th>
<th>Key</th>
<th>PARENT_ID</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>DBJanitor-Private</td>
<td>Name</td>
<td>0</td>
</tr>
<tr>
<td>1</td>
<td>DBJanitor</td>
<td>Owner</td>
<td>1</td>
</tr>
<tr>
<td>2</td>
<td>Database</td>
<td>Product</td>
<td>2</td>
</tr>
<tr>
<td>3</td>
<td>DB Janitor</td>
<td>Portfolio</td>
<td>3</td>
</tr>
<tr>
<td>4</td>
<td>DB Service</td>
<td>Service</td>
<td>4</td>
</tr>
<tr>
<td>5</td>
<td>None</td>
<td>None</td>
<td>None</td>
</tr>
</tbody>
</table>
I would do it this way:
fn = r'D:\temp\.data\40450591.json'
with open(fn) as f:
data = json.load(f)
# some of your records seem NOT to have `Tags` key, hence `KeyError: 'Tags'`
# let's fix it
for r in data['Volumes']:
if 'Tags' not in r:
r['Tags'] = []
v = pd.DataFrame(data['Volumes']).drop(['Attachments', 'Tags'],1)
a = pd.io.json.json_normalize(data['Volumes'], 'Attachments', ['VolumeId'], meta_prefix='parent_')
t = pd.io.json.json_normalize(data['Volumes'], 'Tags', ['VolumeId'], meta_prefix='parent_')
v.to_sql('volume', engine)
a.to_sql('attachment', engine)
t.to_sql('tag', engine)
Output:
In [179]: v
Out[179]:
AvailabilityZone CreateTime Iops Size SnapshotId State VolumeType
VolumeId
vol-049df61146c4d7901 us-east-1a 2013-12-18T22:35:00.084Z NaN 8 snap-1234567890abcdef0 in-use standard
vol-1234567890abcdef0 us-east-1a 2014-02-27T00:02:41.791Z 1000.0 100 None available io1
In [180]: a
Out[180]:
AttachTime DeleteOnTermination Device InstanceId State VolumeId parent_VolumeId
0 2013-12-18T22:35:00.000Z True /dev/sda1 i-1234567890abcdef0 attached vol-049df61146c4d7901 vol-049df61146c4d7901
1 2013-12-18T22:35:11.000Z True /dev/sda1 i-1234567890abcdef1 attached vol-049df61146c4d7111 vol-049df61146c4d7901
In [217]: t
Out[217]:
Key Value parent_VolumeId
0 Name DBJanitor-Private vol-049df61146c4d7901
1 Owner DBJanitor vol-049df61146c4d7901
2 Product Database vol-049df61146c4d7901
3 Portfolio DB Janitor vol-049df61146c4d7901
4 Service DB Service vol-049df61146c4d7901
Test JSON file:
{
"Volumes": [
{
"AvailabilityZone": "us-east-1a",
"Attachments": [
{
"AttachTime": "2013-12-18T22:35:00.000Z",
"InstanceId": "i-1234567890abcdef0",
"VolumeId": "vol-049df61146c4d7901",
"State": "attached",
"DeleteOnTermination": true,
"Device": "/dev/sda1"
},
{
"AttachTime": "2013-12-18T22:35:11.000Z",
"InstanceId": "i-1234567890abcdef1",
"VolumeId": "vol-049df61146c4d7111",
"State": "attached",
"DeleteOnTermination": true,
"Device": "/dev/sda1"
}
],
"Tags": [
{
"Value": "DBJanitor-Private",
"Key": "Name"
},
{
"Value": "DBJanitor",
"Key": "Owner"
},
{
"Value": "Database",
"Key": "Product"
},
{
"Value": "DB Janitor",
"Key": "Portfolio"
},
{
"Value": "DB Service",
"Key": "Service"
}
],
"VolumeType": "standard",
"VolumeId": "vol-049df61146c4d7901",
"State": "in-use",
"SnapshotId": "snap-1234567890abcdef0",
"CreateTime": "2013-12-18T22:35:00.084Z",
"Size": 8
},
{
"AvailabilityZone": "us-east-1a",
"Attachments": [],
"VolumeType": "io1",
"VolumeId": "vol-1234567890abcdef0",
"State": "available",
"Iops": 1000,
"SnapshotId": null,
"CreateTime": "2014-02-27T00:02:41.791Z",
"Size": 100
}
]
}