dl.google.com: Powered by Go

26 July 2013

Brad Fitzpatrick

Gopher, Google

Overview / tl;dw:

too long...

me

I love Go

dl.google.com

dl.google.com

Why port?

reason 0

$ apt-get update

Yeah, embarassing, for years...

... which led to:

How hard can this be?

dl.google.com: few tricks

each "payload" (~URL) described by a protobuf:

dl.google.com: how it was

Aside: Why good code goes bad

Why good code goes bad

code complexity

changing environment

so why did it suck?

but why?

Old code

Mitigation solution?

Summary of 5-year old code in 2012

Environment changes

Copying N bytes from A to B in event loop environments (node.js, this C++, etc)

Thought that sucked? Try to mix in other state / logic, and then write it in C++.

Or in JavaScript...

Copying N bytes from A to B in Go:

    n, err := io.Copy(dst, src)

Where to start?

Notable stages

Notable stages

Using Go's Standard Library

Using Go's Standard Library

Go's Standard Library

Hello World

package main

import (
    "fmt"
    "log"
    "net/http"
    "os"
)

func handler(w http.ResponseWriter, r *http.Request) {
    fmt.Fprintf(os.Stdout, "%s details: %+v\n", r.URL.Path, r)
    fmt.Fprintf(w, "Hello, world! at %s\n", r.URL.Path)
}

func main() {
    log.Printf("Running...")
    log.Fatal(http.ListenAndServe("127.0.0.1:8080", http.HandlerFunc(handler)))
}

File Server

package main

import (
    "log"
    "net/http"
    "os"
    "path/filepath"
)

func main() {
    log.Printf("Running...")
    log.Fatal(http.ListenAndServe(
        "127.0.0.1:8080",
        http.FileServer(http.Dir(
            filepath.Join(os.Getenv("HOME"), "go", "doc")))))
}

http.ServeContent

io.Reader, io.Seeker

http.ServeContent

$ curl -H "Range: bytes=5-" http://localhost:8080

package main

import (
    "log"
    "net/http"
    "strings"
    "time"
)

func main() {
    log.Printf("Running...")
    err := http.ListenAndServe("127.0.0.1:8080", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
        http.ServeContent(w, r, "foo.txt", time.Now(),
            strings.NewReader("I am some content.\n"))
    }))
    log.Fatal(err)
}

groupcache

groupcache

Using groupcache

Declare who you are and who your peers are.

    me := "http://10.0.0.1"
    peers := groupcache.NewHTTPPool(me)

    // Whenever peers change:
    peers.Set("http://10.0.0.1", "http://10.0.0.2", "http://10.0.0.3")

This peer interface is pluggable. (e.g. inside Google it's automatic.)

Using groupcache

Declare a group. (group of keys, shared between group of peers)

    var thumbNails = groupcache.NewGroup("thumbnail", 64<<20, groupcache.GetterFunc(
        func(ctx groupcache.Context, key string, dest groupcache.Sink) error {
            fileName := key
            dest.SetBytes(generateThumbnail(fileName))
            return nil
        }))

Using groupcache

Request keys

    var data []byte
    err := thumbNails.Get(ctx, "big-file.jpg",
        groupcache.AllocatingByteSliceSink(&data))
    // ...
    http.ServeContent(w, r, "big-file-thumb.jpg", modTime, bytes.NewReader(data))

dl.google.com and groupcache

dl.google.com interface composition

// A SizeReaderAt is a ReaderAt with a Size method.
//
// An io.SectionReader implements SizeReaderAt.
type SizeReaderAt interface {
    Size() int64
    io.ReaderAt
}

// NewMultiReaderAt is like io.MultiReader but produces a ReaderAt
// (and Size), instead of just a reader.
func NewMultiReaderAt(parts ...SizeReaderAt) SizeReaderAt {
    m := &multi{
        parts: make([]offsetAndSource, 0, len(parts)),
    }
    var off int64
    for _, p := range parts {
        m.parts = append(m.parts, offsetAndSource{off, p})
        off += p.Size()
    }
    m.size = off
    return m
}

io.SectionReader

chunk-aligned ReaderAt

// NewChunkAlignedReaderAt returns a ReaderAt wrapper that is backed
// by a ReaderAt r of size totalSize where the wrapper guarantees that
// all ReadAt calls are aligned to chunkSize boundaries and of size
// chunkSize (except for the final chunk, which may be shorter).
//
// A chunk-aligned reader is good for caching, letting upper layers have
// any access pattern, but guarantees that the wrapped ReaderAt sees
// only nicely-cacheable access patterns & sizes.
func NewChunkAlignedReaderAt(r SizeReaderAt, chunkSize int) SizeReaderAt {
    // ...
}

Composing all this

// +build ignore,OMIT

package main

import (
	"io"
	"log"
	"net/http"
	"sort"
	"strings"
	"time"
)

var modTime = time.Unix(1374708739, 0)

func part(s string) SizeReaderAt {
    return io.NewSectionReader(strings.NewReader(s), 0, int64(len(s)))
}

func handler(w http.ResponseWriter, r *http.Request) {
    sra := NewMultiReaderAt(
        part("Hello, "), part(" world! "),
        part("You requested "+r.URL.Path+"\n"),
    )
    rs := io.NewSectionReader(sra, 0, sra.Size())
    http.ServeContent(w, r, "foo.txt", modTime, rs)
}

func main() {
	log.Printf("Running...")
	http.HandleFunc("/", handler)
	log.Fatal(http.ListenAndServe("127.0.0.1:8080", nil))
}

// START_1 OMIT
// A SizeReaderAt is a ReaderAt with a Size method.
//
// An io.SectionReader implements SizeReaderAt.
type SizeReaderAt interface {
	Size() int64
	io.ReaderAt
}

// NewMultiReaderAt is like io.MultiReader but produces a ReaderAt
// (and Size), instead of just a reader.
func NewMultiReaderAt(parts ...SizeReaderAt) SizeReaderAt {
	m := &multi{
		parts: make([]offsetAndSource, 0, len(parts)),
	}
	var off int64
	for _, p := range parts {
		m.parts = append(m.parts, offsetAndSource{off, p})
		off += p.Size()
	}
	m.size = off
	return m
}

// END_1 OMIT

type offsetAndSource struct {
	off int64
	SizeReaderAt
}

type multi struct {
	parts []offsetAndSource
	size  int64
}

func (m *multi) Size() int64 { return m.size }

func (m *multi) ReadAt(p []byte, off int64) (n int, err error) {
	wantN := len(p)

	// Skip past the requested offset.
	skipParts := sort.Search(len(m.parts), func(i int) bool {
		// This function returns whether parts[i] will
		// contribute any bytes to our output.
		part := m.parts[i]
		return part.off+part.Size() > off
	})
	parts := m.parts[skipParts:]

	// How far to skip in the first part.
	needSkip := off
	if len(parts) > 0 {
		needSkip -= parts[0].off
	}

	for len(parts) > 0 && len(p) > 0 {
		readP := p
		partSize := parts[0].Size()
		if int64(len(readP)) > partSize-needSkip {
			readP = readP[:partSize-needSkip]
		}
		pn, err0 := parts[0].ReadAt(readP, needSkip)
		if err0 != nil {
			return n, err0
		}
		n += pn
		p = p[pn:]
		if int64(pn)+needSkip == partSize {
			parts = parts[1:]
		}
		needSkip = 0
	}

	if n != wantN {
		err = io.ErrUnexpectedEOF
	}
	return
}

Things we get for free from net/http

Overall simplification

From this...

... to this.

And from page and pages of this...

... to this

So how does it compare to C++?

Could we have just rewritten it in new C++?

Could I have just fixed the bugs in the C++ version?

How much of dl.google.com is closed-source?

Thank you