Python JSON
JSON (JavaScript Object Notation) is a lightweight data interchange format. Learn how to work with JSON in Python using the built-in json module for encoding, decoding, and manipulating JSON data.
What is JSON?
JSON (JavaScript Object Notation) is a lightweight, text-based data interchange format that's easy for humans to read and write, and easy for machines to parse and generate. It's widely used for data exchange between applications.
"JSON is the universal language of data exchange on the web."
Basic JSON Operations
Python's json module provides functions for encoding and decoding JSON:
import json
# Python data
data = {
"name": "John Doe",
"age": 30,
"city": "New York",
"skills": ["Python", "JavaScript", "SQL"],
"is_active": True
}
# Convert to JSON string (dumps = dump string)
json_string = json.dumps(data)
print("JSON string:")
print(json_string)
print("Type:", type(json_string))
# Convert back to Python (loads = load string)
python_data = json.loads(json_string)
print("\nBack to Python:")
print(python_data)
print("Type:", type(python_data))
print("Name:", python_data["name"])
Working with Files
Read and write JSON to/from files:
import json
# Data to save
user_data = {
"users": [
{"id": 1, "name": "Alice", "email": "alice@example.com"},
{"id": 2, "name": "Bob", "email": "bob@example.com"},
{"id": 3, "name": "Charlie", "email": "charlie@example.com"}
],
"total_users": 3,
"last_updated": "2023-12-25"
}
# Write to file (dump = dump to file)
with open("users.json", "w") as file:
json.dump(user_data, file, indent=4)
print("Data written to users.json")
# Read from file (load = load from file)
with open("users.json", "r") as file:
loaded_data = json.load(file)
print("Data loaded from file:")
print(json.dumps(loaded_data, indent=2))
JSON Formatting Options
Control the appearance of JSON output:
import json
data = {
"employees": [
{"name": "John", "department": "Engineering", "salary": 75000},
{"name": "Jane", "department": "Marketing", "salary": 65000},
{"name": "Bob", "department": "Sales", "salary": 55000}
]
}
# Compact JSON (default)
compact = json.dumps(data)
print("Compact JSON:")
print(compact)
print()
# Pretty-printed JSON
pretty = json.dumps(data, indent=4)
print("Pretty JSON:")
print(pretty)
print()
# Sorted keys
sorted_json = json.dumps(data, indent=2, sort_keys=True)
print("Sorted keys:")
print(sorted_json)
print()
# Custom separators
custom_sep = json.dumps(data, separators=(',', ':'))
print("Custom separators:")
print(custom_sep)
Handling Different Data Types
JSON supports specific data types:
import json
from datetime import datetime
# JSON-compatible types
json_data = {
"string": "Hello World",
"number": 42,
"float": 3.14,
"boolean": True,
"null": None,
"array": [1, 2, 3, "four"],
"object": {"key": "value"}
}
json_str = json.dumps(json_data, indent=2)
print("JSON-compatible data:")
print(json_str)
# Non-JSON types need custom handling
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
person = Person("Alice", 30)
# This will fail
try:
json.dumps(person)
except TypeError as e:
print(f"\nError: {e}")
# Custom serialization
def person_to_dict(obj):
if isinstance(obj, Person):
return {"name": obj.name, "age": obj.age, "__class__": "Person"}
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
# Using default parameter
json_str = json.dumps(person, default=person_to_dict, indent=2)
print("\nCustom serialization:")
print(json_str)
# Custom deserialization
def dict_to_person(dct):
if "__class__" in dct and dct["__class__"] == "Person":
return Person(dct["name"], dct["age"])
return dct
# Using object_hook
parsed = json.loads(json_str, object_hook=dict_to_person)
print(f"\nDeserialized: {parsed.name}, {parsed.age}")
Custom JSON Encoder/Decoder
Create custom encoders and decoders:
import json
from datetime import datetime, date
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return {"__datetime__": obj.isoformat()}
elif isinstance(obj, date):
return {"__date__": obj.isoformat()}
elif isinstance(obj, set):
return {"__set__": list(obj)}
return super().default(obj)
class CustomDecoder(json.JSONDecoder):
def __init__(self, *args, **kwargs):
super().__init__(object_hook=self.object_hook, *args, **kwargs)
def object_hook(self, dct):
if "__datetime__" in dct:
return datetime.fromisoformat(dct["__datetime__"])
elif "__date__" in dct:
return date.fromisoformat(dct["__date__"])
elif "__set__" in dct:
return set(dct["__set__"])
return dct
# Test custom encoder/decoder
data = {
"name": "Project Alpha",
"created": datetime.now(),
"deadline": date(2024, 12, 31),
"tags": {"python", "web", "api"}
}
# Encode
encoded = json.dumps(data, cls=CustomEncoder, indent=2)
print("Encoded:")
print(encoded)
# Decode
decoded = json.loads(encoded, cls=CustomDecoder)
print("\nDecoded:")
print(f"Name: {decoded['name']}")
print(f"Created: {decoded['created']}")
print(f"Deadline: {decoded['deadline']}")
print(f"Tags: {decoded['tags']}")
JSON Schema Validation
Validate JSON data against a schema:
import json
import jsonschema
# Define schema
user_schema = {
"type": "object",
"properties": {
"name": {"type": "string", "minLength": 1},
"age": {"type": "integer", "minimum": 0, "maximum": 150},
"email": {"type": "string", "format": "email"},
"active": {"type": "boolean"}
},
"required": ["name", "email"]
}
# Test data
valid_user = {
"name": "John Doe",
"age": 30,
"email": "john@example.com",
"active": True
}
invalid_user = {
"name": "",
"age": -5,
"email": "not-an-email"
}
def validate_user(user_data):
try:
jsonschema.validate(user_data, user_schema)
return True, "Valid"
except jsonschema.ValidationError as e:
return False, str(e)
except jsonschema.SchemaError as e:
return False, f"Schema error: {e}"
print("Valid user:", validate_user(valid_user))
print("Invalid user:", validate_user(invalid_user))
Practical Examples
import json
import requests
from datetime import datetime
# Configuration management
def load_config(filename="config.json"):
try:
with open(filename, "r") as f:
return json.load(f)
except FileNotFoundError:
return {"database": "localhost", "port": 5432, "debug": False}
def save_config(config, filename="config.json"):
with open(filename, "w") as f:
json.dump(config, f, indent=2)
# API data handling
def fetch_user_data(user_id):
# Simulate API call
api_response = {
"id": user_id,
"name": "John Doe",
"posts": [
{"id": 1, "title": "First Post", "content": "Hello World"},
{"id": 2, "title": "Second Post", "content": "More content"}
]
}
return api_response
def save_user_data(user_data, filename):
with open(filename, "w") as f:
json.dump(user_data, f, indent=2, default=str)
# Data transformation
def flatten_json(nested_json, prefix=""):
"""Flatten nested JSON to flat dictionary"""
flattened = {}
for key, value in nested_json.items():
new_key = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict):
flattened.update(flatten_json(value, new_key))
elif isinstance(value, list):
for i, item in enumerate(value):
if isinstance(item, dict):
flattened.update(flatten_json(item, f"{new_key}[{i}]"))
else:
flattened[f"{new_key}[{i}]"] = item
else:
flattened[new_key] = value
return flattened
# Pretty print with syntax highlighting
def pretty_print_json(data):
"""Print JSON with indentation and colors (simplified)"""
print(json.dumps(data, indent=2, sort_keys=True))
# Merge JSON objects
def merge_json(*json_objects):
"""Merge multiple JSON objects"""
result = {}
for obj in json_objects:
result.update(obj)
return result
# Filter JSON data
def filter_by_key(data, keys_to_keep):
"""Keep only specified keys from JSON object"""
if isinstance(data, dict):
return {k: v for k, v in data.items() if k in keys_to_keep}
elif isinstance(data, list):
return [filter_by_key(item, keys_to_keep) for item in data]
else:
return data
# Usage examples
nested_data = {
"user": {
"name": "Alice",
"profile": {
"age": 30,
"city": "NYC"
}
},
"posts": [
{"title": "Post 1", "likes": 10},
{"title": "Post 2", "likes": 5}
]
}
flattened = flatten_json(nested_data)
print("Flattened:")
for key, value in flattened.items():
print(f" {key}: {value}")
# Filter example
user_data = {"name": "John", "age": 30, "email": "john@example.com", "password": "secret"}
filtered = filter_by_key(user_data, ["name", "age", "email"])
print("\nFiltered user data:")
print(json.dumps(filtered, indent=2))
Best Practices
- Use context managers for file operations: with open() as f
- Handle JSON errors gracefully: try/except blocks
- Use indent for readability: During development
- Validate JSON data: Before processing
- Use sort_keys for consistency: In configuration files
- Document custom encoders/decoders: Complex transformations
- Consider security implications: When parsing untrusted JSON
- Use appropriate data types: JSON has limited type support
- Handle encoding issues: Specify encoding for files
- Test JSON operations: With various data structures
Common Issues and Solutions
- Datetime serialization: Use custom encoder or convert to string
- Set objects: Convert to list for JSON compatibility
- Circular references: Avoid or use custom serialization
- Large JSON files: Use streaming for big data
- Unicode issues: Use ensure_ascii=False for non-ASCII characters
- Precision loss: Be aware of float precision limitations
- Key order: JSON objects don't guarantee order (use sort_keys)
JSON is the standard for data interchange in modern web applications. Python's json module makes it easy to work with JSON data, but understanding its limitations and best practices is crucial for robust applications.