X methods to manage python configuration

Updates

Introduction

1. Config file(JSON / YAML / INI / *.cfg)

// config.json
{
"mysql":{
"host":"localhost",
"user":"root",
"passwd":"my secret password",
"db":"write-math"
},
"other":{
"preprocessing_queue":[
"preprocessing.scale_and_center",
"preprocessing.dot_reduction",
"preprocessing.connect_lines"
],
"use_anonymous":true
}
}
import json

with open('config.json') as json_data_file:
data = json.load(json_data_file)
print(data)
mysql:
host: localhost
user: root
passwd: my secret password
db: write-math
other:
preprocessing_queue:
- preprocessing.scale_and_center
- preprocessing.dot_reduction
- preprocessing.connect_lines
use_anonymous: yes
import yaml

with open("config.yml", 'r') as ymlfile:
cfg = yaml.load(ymlfile)
[mysql]
host=localhost
user=root
passwd=my secret password
db=write-math

[other]
preprocessing_queue=["preprocessing.scale_and_center",
"preprocessing.dot_reduction",
"preprocessing.connect_lines"]
use_anonymous=yes
#!/usr/bin/env python

import ConfigParser
import io

# Load the configuration file
with open("config.ini") as f:
sample_config = f.read()
config = ConfigParser.RawConfigParser(allow_no_value=True)
config.readfp(io.BytesIO(sample_config))

# List all contents
print("List all contents")
for section in config.sections():
print("Section: %s" % section)
for options in config.options(section):
print("x %s:::%s:::%s" % (options,
config.get(section, options),
str(type(options))))

# Print some contents
print("\nPrint some contents")
print(config.get('other', 'use_anonymous')) # Just get the value
print(config.getboolean('other', 'use_anonymous'))

2. Database

3. Centralized Configuration service

4. Operation system’s environment variables

5. Python scripts

//config.py
from pathlib import Path
import os
import socket

basedir = os.path.abspath(os.path.dirname(__file__))

__version__ = "0.1.22"
class Config:
HOST_NAME = socket.getfqdn(socket.gethostname())
HOST_IP = socket.gethostbyname(HOST_NAME)

# s3 setting
AWS_REGION_NAME = 'us-east-1'
S3_BUCKET_NAME = 'default'
S3_PREFIX = 'dfs'

# DIR
DATA_ROOT = Path(f'{basedir}/data')
MODEL_ROOT = None

# Debug setting
DEBUG = True
class OnPremiseWorker(Config):
DATA_ROOT = Path(os.getenv(
'APP_DATA_ROOT'),
f'/data/xxx/data'
)
MODEL_ROOT = Path(os.getenv(
'APP_MODEL_ROOT'),
f'/data/xxx/models'
)
DEBUG = Path(os.getenv(
'APP_VERBOSE'),
0
)
class Debug(Config):
DEBUG = True
config_cls = {
'default': Debug,
'prem': OnPremiseWorker,
'debug': Debug,
}
from config import config_clsconfig = config_cls[os.getenv('ENV', 'default')]print(config.DEBUG)

6. Multilayer config

1. set as an environment variable
2. set as a command environment variable
3. set in airflow.cfg
4. command in airflow.cfg
5. Airflow’s built in defaults

7. Hydra

db:
driver: mysql
user: omry
pass: secret
import hydra
from omegaconf import DictConfig
@hydra.main(config_path="config.yaml")
def my_app(cfg : DictConfig) -> None:
print(cfg.pretty())
if __name__ == "__main__":
my_app()

8. Gin config

9. dynaconf

Conclusion:

References:

Data scientist & MLE & SWE

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store