Link Search Menu Expand Document

JSON Files

Working With JSON Data in Python

from pprint import pprint
# scores dictionary
games = {
    'games':[
        {
            'date': '2019-05-01', 
            'home_team': 'Pirates', 
            'home_score': 0, 
            'away_team': 'Cubs', 
            'away_score': 10
        },
        {
            'date': '2019-05-15', 
            'home_team': 'Reds', 
            'home_score': 7, 
            'away_team': 'Pirates', 
            'away_score': 0
        },
        {
            'date': '2019-05-17', 
            'home_team': 'Reds', 
            'home_score': 12, 
            'away_team': 'Pirates', 
            'away_score': 0
        },
        {
            'date': '2019-05-18', 
            'home_team': 'Reds', 
            'home_score': 8, 
            'away_team': 'Pirates', 
            'away_score': 0
        },
        {
            'date': '2019-05-21', 
            'home_team': 'Pirates', 
            'home_score': 1, 
            'away_team': 'Brewers', 
            'away_score': 11
        }
    ]
}
pprint(games)
{'games': [{'away_score': 10,
            'away_team': 'Cubs',
            'date': '2019-05-01',
            'home_score': 0,
            'home_team': 'Pirates'},
           {'away_score': 0,
            'away_team': 'Pirates',
            'date': '2019-05-15',
            'home_score': 7,
            'home_team': 'Reds'},
           {'away_score': 0,
            'away_team': 'Pirates',
            'date': '2019-05-17',
            'home_score': 12,
            'home_team': 'Reds'},
           {'away_score': 0,
            'away_team': 'Pirates',
            'date': '2019-05-18',
            'home_score': 8,
            'home_team': 'Reds'},
           {'away_score': 11,
            'away_team': 'Brewers',
            'date': '2019-05-21',
            'home_score': 1,
            'home_team': 'Pirates'}]}

Python Serialization

In this instance, serialization is the process of converting the python objects to JSON. Python object types get serialized to the following JSON types.

Python JSON
dict object
list, tuple array
str string
int, long, float number
True true
False false
None null
import json
pprint(games)
print()

games_json = json.dumps(games)
pprint(games_json)
{'games': [{'away_score': 10,
            'away_team': 'Cubs',
            'date': '2019-05-01',
            'home_score': 0,
            'home_team': 'Pirates'},
           {'away_score': 0,
            'away_team': 'Pirates',
            'date': '2019-05-15',
            'home_score': 7,
            'home_team': 'Reds'},
           {'away_score': 0,
            'away_team': 'Pirates',
            'date': '2019-05-17',
            'home_score': 12,
            'home_team': 'Reds'},
           {'away_score': 0,
            'away_team': 'Pirates',
            'date': '2019-05-18',
            'home_score': 8,
            'home_team': 'Reds'},
           {'away_score': 11,
            'away_team': 'Brewers',
            'date': '2019-05-21',
            'home_score': 1,
            'home_team': 'Pirates'}]}

('{"games": [{"date": "2019-05-01", "home_team": "Pirates", "home_score": 0, '
 '"away_team": "Cubs", "away_score": 10}, {"date": "2019-05-15", "home_team": '
 '"Reds", "home_score": 7, "away_team": "Pirates", "away_score": 0}, {"date": '
 '"2019-05-17", "home_team": "Reds", "home_score": 12, "away_team": "Pirates", '
 '"away_score": 0}, {"date": "2019-05-18", "home_team": "Reds", "home_score": '
 '8, "away_team": "Pirates", "away_score": 0}, {"date": "2019-05-21", '
 '"home_team": "Pirates", "home_score": 1, "away_team": "Brewers", '
 '"away_score": 11}]}')
# changing whitespace
games_json = json.dumps(games, indent=4)
print(games_json)
{
    "games": [
        {
            "date": "2019-05-01",
            "home_team": "Pirates",
            "home_score": 0,
            "away_team": "Cubs",
            "away_score": 10
        },
        {
            "date": "2019-05-15",
            "home_team": "Reds",
            "home_score": 7,
            "away_team": "Pirates",
            "away_score": 0
        },
        {
            "date": "2019-05-17",
            "home_team": "Reds",
            "home_score": 12,
            "away_team": "Pirates",
            "away_score": 0
        },
        {
            "date": "2019-05-18",
            "home_team": "Reds",
            "home_score": 8,
            "away_team": "Pirates",
            "away_score": 0
        },
        {
            "date": "2019-05-21",
            "home_team": "Pirates",
            "home_score": 1,
            "away_team": "Brewers",
            "away_score": 11
        }
    ]
}
# compacting
games_json = json.dumps(games, separators=(',', ':'))
print(games_json)
{"games":[{"date":"2019-05-01","home_team":"Pirates","home_score":0,"away_team":"Cubs","away_score":10},{"date":"2019-05-15","home_team":"Reds","home_score":7,"away_team":"Pirates","away_score":0},{"date":"2019-05-17","home_team":"Reds","home_score":12,"away_team":"Pirates","away_score":0},{"date":"2019-05-18","home_team":"Reds","home_score":8,"away_team":"Pirates","away_score":0},{"date":"2019-05-21","home_team":"Pirates","home_score":1,"away_team":"Brewers","away_score":11}]}

Writing JSON files

with open("games.json", "w") as f:
    json.dump(games, f)

Reading JSON files

# open and print line by line
with open('games.json', "r") as f:
    games = json.load(f)
    print(games)
{'games': [{'date': '2019-05-01', 'home_team': 'Pirates', 'home_score': 0, 'away_team': 'Cubs', 'away_score': 10}, {'date': '2019-05-15', 'home_team': 'Reds', 'home_score': 7, 'away_team': 'Pirates', 'away_score': 0}, {'date': '2019-05-17', 'home_team': 'Reds', 'home_score': 12, 'away_team': 'Pirates', 'away_score': 0}, {'date': '2019-05-18', 'home_team': 'Reds', 'home_score': 8, 'away_team': 'Pirates', 'away_score': 0}, {'date': '2019-05-21', 'home_team': 'Pirates', 'home_score': 1, 'away_team': 'Brewers', 'away_score': 11}]}

Using JSONPath

Source

One of the biggest strengths of XML is XPath, the query-oriented language to query subsections of an XML document. In the same line, JSONPath is a query language with features similar to XPath that lets you extract just the bits of a JSON document your application needs.

JSONPath Syntax

As XPath, JSONPath also has syntax to follow:

  • $ – symbol refers to the root object or element.
  • @ – symbol refers to the current object or element.
  • . – operator is the dot-child operator, which you use to denote a child element of the current element.
  • [ ] – is the subscript operator, which you use to denote a child element of the current element (by name or index).
  • * – operator is a wildcard, returning all objects or elements regardless of their names.
  • , – operator is the union operator, which returns the union of the children or indexes indicated.
  • : – operator is the array slice operator, so you can slice collections using the syntax [start:end:step] to return a subcollection of a collection.
  • ( ) – operator lets you pass a script expression in the underlying implementation’s script language. It’s not supported by every implementation of JSONPath, however.
  • ? ( ) – to query all items that meet a certain criteria.

There are many online jsonpath validators out there, and I encourage you to try a few.

Using JSONPath in Python

To use JSONPath, you will need to include its dependency and then use it.

The library we’ll use is jsonpath-rw-ext

pip install jsonpath-rw-ext

Once installed we’re ready to use it.

Finding Information about Pirate Games

Lets try to parse a JSON file a few different ways. First, we need to load the data as a JSON object into memory.

with open('games.json', "r") as f:
    games = json.load(f)
    pprint(games)
{'games': [{'away_score': 10,
            'away_team': 'Cubs',
            'date': '2019-05-01',
            'home_score': 0,
            'home_team': 'Pirates'},
           {'away_score': 0,
            'away_team': 'Pirates',
            'date': '2019-05-15',
            'home_score': 7,
            'home_team': 'Reds'},
           {'away_score': 0,
            'away_team': 'Pirates',
            'date': '2019-05-17',
            'home_score': 12,
            'home_team': 'Reds'},
           {'away_score': 0,
            'away_team': 'Pirates',
            'date': '2019-05-18',
            'home_score': 8,
            'home_team': 'Reds'},
           {'away_score': 11,
            'away_team': 'Brewers',
            'date': '2019-05-21',
            'home_score': 1,
            'home_team': 'Pirates'}]}

Now that the data is loaded, we can try to answer a few questions.

What dates were the games played on?

# json parsing
game_dates = []
for game in games['games']:
    game_date = game['date']
    game_dates.append(game_date)
print(game_dates)
['2019-05-01', '2019-05-15', '2019-05-17', '2019-05-18', '2019-05-21']
# jsonpath parsing
# import the library
import jsonpath_rw_ext as jp
# create a jsonpath expression and match
game_dates = jp.match("$.games[*].date", games)
print(game_dates)
['2019-05-01', '2019-05-15', '2019-05-17', '2019-05-18', '2019-05-21']
# this also works
game_dates = jp.match("$.games..date", games)
print(game_dates)
['2019-05-01', '2019-05-15', '2019-05-17', '2019-05-18', '2019-05-21']

What were the scores for the Pirates home games?

# json parsing
home_scores = []
for game in games['games']:
    if game['home_team'] == 'Pirates':
        home_scores.append(game)
        
pprint(home_scores)
[{'away_score': 10,
  'away_team': 'Cubs',
  'date': '2019-05-01',
  'home_score': 0,
  'home_team': 'Pirates'},
 {'away_score': 11,
  'away_team': 'Brewers',
  'date': '2019-05-21',
  'home_score': 1,
  'home_team': 'Pirates'}]
# jsonpath parsing
home_scores = jp.match("$.games[?(@.home_team=='Pirates')]", games)
pprint(home_scores)
[{'away_score': 10,
  'away_team': 'Cubs',
  'date': '2019-05-01',
  'home_score': 0,
  'home_team': 'Pirates'},
 {'away_score': 11,
  'away_team': 'Brewers',
  'date': '2019-05-21',
  'home_score': 1,
  'home_team': 'Pirates'}]

What were the scores after May 15th

# json parsing
game_scores = []
for game in games['games']:
    if game['date'] > '2019-05-15':
        game_scores.append(game)
        
pprint(game_scores)
[{'away_score': 0,
  'away_team': 'Pirates',
  'date': '2019-05-17',
  'home_score': 12,
  'home_team': 'Reds'},
 {'away_score': 0,
  'away_team': 'Pirates',
  'date': '2019-05-18',
  'home_score': 8,
  'home_team': 'Reds'},
 {'away_score': 11,
  'away_team': 'Brewers',
  'date': '2019-05-21',
  'home_score': 1,
  'home_team': 'Pirates'}]
# jsonpath parsing
game_scores = jp.match("$.games[?(@.date > '2019-05-15')]", games)
pprint(game_scores)
[{'away_score': 0,
  'away_team': 'Pirates',
  'date': '2019-05-17',
  'home_score': 12,
  'home_team': 'Reds'},
 {'away_score': 0,
  'away_team': 'Pirates',
  'date': '2019-05-18',
  'home_score': 8,
  'home_team': 'Reds'},
 {'away_score': 11,
  'away_team': 'Brewers',
  'date': '2019-05-21',
  'home_score': 1,
  'home_team': 'Pirates'}]