python CI&CD

automate machine learning project with docker, makefile and Devops tools

docker

development

Debuggable Docker Containers for Development

    # dev.Dockerfile
    FROM python:3.8.1-buster AS builder
    RUN apt-get update && apt-get install -y --no-install-recommends --yes python3-venv gcc libpython3-dev && \
    python3 -m venv /venv && \
    /venv/bin/pip install --upgrade pip

    FROM builder AS builder-venv

    COPY requirements.txt /requirements.txt
    RUN /venv/bin/pip install -r /requirements.txt

    FROM builder-venv AS tester

    COPY . /app
    WORKDIR /app
    RUN /venv/bin/pytest

    FROM martinheinz/python-3.8.1-buster-tools:latest AS runner
    COPY --from=tester /venv /venv
    COPY --from=tester /app /app

    WORKDIR /app

    ENTRYPOINT ["/venv/bin/python3", "-m", "blueprint"]
    USER 1001

    LABEL name={NAME}
    LABEL version={VERSION} ### production minimize docker image size for production

    # prod.Dockerfile
    #  1. Line - Change builder image
    FROM debian:buster-slim AS builder
    #  ...
    #  17. Line - Switch to Distroless image
    FROM gcr.io/distroless/python3-debian10 AS runner
    #  ... Rest of the Dockefile

there is an debuggable version of gcr.io/distroless/python3-debian10 named gcr.io/distroless/python3-debian10:debug

    #  17. Line - Switch to Distroless image:debug
    FROM gcr.io/distroless/python3-debian10:debug AS runner
    #  ... Rest of the Dockefile

Makefile

build dev

using make build-dev to run following target

    # The binary to build (just the basename).
    MODULE := blueprint

    # Where to push the docker image.
    REGISTRY ?= docker.pkg.github.com/martinheinz/python-project-blueprint

    IMAGE := $(REGISTRY)/$(MODULE)

    # This version-strategy uses git tags to set the version string
    TAG := $(shell git describe --tags --always --dirty)

    build-dev:
            @echo "\n${BLUE}Building Development image with labels:\n"
            @echo "name: $(MODULE)"
            @echo "version: $(TAG)${NC}\n"
            @sed                                 \
            -e 's|{NAME}|$(MODULE)|g'        \
            -e 's|{VERSION}|$(TAG)|g'        \
            dev.Dockerfile | docker build -t $(IMAGE):$(TAG) -f- .

build prod

using make build-prod VERSION=1.0.0

    build-prod:
            @echo "\n${BLUE}Building Production image with labels:\n"
            @echo "name: $(MODULE)"
            @echo "version: $(VERSION)${NC}\n"
            @sed                                     \
            -e 's|{NAME}|$(MODULE)|g'            \
            -e 's|{VERSION}|$(VERSION)|g'        \
            prod.Dockerfile | docker build -t $(IMAGE):$(VERSION) -f- .

build containerized env

entrypoint gets overridden by bash and container command gets overridden by argument. This way we can either just enter the container and poke around or run one off command

    # Example: make shell CMD="-c 'date > datefile'"
    shell: build-dev
            @echo "\n${BLUE}Launching a shell in the containerized build environment...${NC}\n"
                    @docker run                                                     \
                            -ti                                                     \
                            --rm                                                    \
                            --entrypoint /bin/bash                                  \
                            -u $$(id -u):$$(id -g)                                  \
                            $(IMAGE):$(TAG)						\
                            $(CMD)

build push

using make push VERSION=0.0.2 to push docker image to registry

    REGISTRY ?= docker.pkg.github.com/martinheinz/python-project-blueprint

    push: build-prod
            @echo "\n${BLUE}Pushing image to GitHub Docker Registry...${NC}\n"
            @docker push $(IMAGE):$(VERSION)

build clean

clean up docker artifacts

    docker-clean:
@docker system prune -f --filter "label=name=$(MODULE)"

MLRun

works together with tools like Nuclio (serverless engine) and Kubeflow pipelines to automate the MLOps process and bring CI/CD + Git practices to data science. It provides an SDK and a Kubernetes service.

single function

an example to mlrun function is here: https://github.com/mlrun/mlrun/blob/development/examples/mlrun_export_import.ipynb. the functions can be run in following mode: 1) As a local executable 2) As an auto-scaling containerized micro-service 3) As an in-memory module

complex pipeline

complex pipeline can be managed by kubeflow pipeline, the kubeflow pipeline can be execuated and tracked by MLRun. an example is here: https://github.com/mlrun/demo-github-actions