dl.google.com: Powered by Go

26 July 2013

Brad Fitzpatrick

Gopher, Google

Overview / tl;dw:

too long...

me

I love Go

dl.google.com

dl.google.com

Why port?

reason 0

$ apt-get update

Yeah, embarassing, for years...

... which led to:

How hard can this be?

dl.google.com: few tricks

each "payload" (~URL) described by a protobuf:

dl.google.com: how it was

Aside: Why good code goes bad

Why good code goes bad

code complexity

changing environment

so why did it suck?

but why?

Old code

Mitigation solution?

Summary of 5-year old code in 2012

Environment changes

Copying N bytes from A to B in event loop environments (node.js, this C++, etc)

Thought that sucked? Try to mix in other state / logic, and then write it in C++.

Or in JavaScript...

Copying N bytes from A to B in Go:

    n, err := io.Copy(dst, src)

Where to start?

Notable stages

Notable stages

Using Go's Standard Library

Using Go's Standard Library

Go's Standard Library

Hello World

package main

import (
    "fmt"
    "log"
    "net/http"
    "os"
)

func handler(w http.ResponseWriter, r *http.Request) {
    fmt.Fprintf(os.Stdout, "%s details: %+v\n", r.URL.Path, r)
    fmt.Fprintf(w, "Hello, world! at %s\n", r.URL.Path)
}

func main() {
    log.Printf("Running...")
    log.Fatal(http.ListenAndServe("127.0.0.1:8080", http.HandlerFunc(handler)))
}

File Server

package main

import (
    "log"
    "net/http"
    "os"
    "path/filepath"
)

func main() {
    log.Printf("Running...")
    log.Fatal(http.ListenAndServe(
        "127.0.0.1:8080",
        http.FileServer(http.Dir(
            filepath.Join(os.Getenv("HOME"), "go", "doc")))))
}

http.ServeContent

io.Reader, io.Seeker

http.ServeContent

$ curl -H "Range: bytes=5-" http://localhost:8080

package main

import (
    "log"
    "net/http"
    "strings"
    "time"
)

func main() {
    log.Printf("Running...")
    err := http.ListenAndServe("127.0.0.1:8080", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
        http.ServeContent(w, r, "foo.txt", time.Now(),
            strings.NewReader("I am some content.\n"))
    }))
    log.Fatal(err)
}

groupcache

groupcache

Using groupcache

Declare who you are and who your peers are.

    me := "http://10.0.0.1"
    peers := groupcache.NewHTTPPool(me)

    // Whenever peers change:
    peers.Set("http://10.0.0.1", "http://10.0.0.2", "http://10.0.0.3")

This peer interface is pluggable. (e.g. inside Google it's automatic.)

Using groupcache

Declare a group. (group of keys, shared between group of peers)

    var thumbNails = groupcache.NewGroup("thumbnail", 64<<20, groupcache.GetterFunc(
        func(ctx groupcache.Context, key string, dest groupcache.Sink) error {
            fileName := key
            dest.SetBytes(generateThumbnail(fileName))
            return nil
        }))

Using groupcache

Request keys

    var data []byte
    err := thumbNails.Get(ctx, "big-file.jpg",
        groupcache.AllocatingByteSliceSink(&data))
    // ...
    http.ServeContent(w, r, "big-file-thumb.jpg", modTime, bytes.NewReader(data))

dl.google.com and groupcache

dl.google.com interface composition

// A SizeReaderAt is a ReaderAt with a Size method.
//
// An io.SectionReader implements SizeReaderAt.
type SizeReaderAt interface {
    Size() int64
    io.ReaderAt
}

// NewMultiReaderAt is like io.MultiReader but produces a ReaderAt
// (and Size), instead of just a reader.
func NewMultiReaderAt(parts ...SizeReaderAt) SizeReaderAt {
    m := &multi{
        parts: make([]offsetAndSource, 0, len(parts)),
    }
    var off int64
    for _, p := range parts {
        m.parts = append(m.parts, offsetAndSource{off, p})
        off += p.Size()
    }
    m.size = off
    return m
}

io.SectionReader

chunk-aligned ReaderAt

// NewChunkAlignedReaderAt returns a ReaderAt wrapper that is backed
// by a ReaderAt r of size totalSize where the wrapper guarantees that
// all ReadAt calls are aligned to chunkSize boundaries and of size
// chunkSize (except for the final chunk, which may be shorter).
//
// A chunk-aligned reader is good for caching, letting upper layers have
// any access pattern, but guarantees that the wrapped ReaderAt sees
// only nicely-cacheable access patterns & sizes.
func NewChunkAlignedReaderAt(r SizeReaderAt, chunkSize int) SizeReaderAt {
    // ...
}

Composing all this

// +build ignore,OMIT

package main

import (
	"io"
	"log"
	"net/http"
	"sort"
	"strings"
	"time"
)

var modTime = time.Unix(1374708739, 0)

func part(s string) SizeReaderAt {
    return io.NewSectionReader(strings.NewReader(s), 0, int64(len(s)))
}

func handler(w http.ResponseWriter, r *http.Request) {
    sra := NewMultiReaderAt(
        part("Hello, "), part(" world! "),
        part("You requested "+r.URL.Path+"\n"),
    )
    rs := io.NewSectionReader(sra, 0, sra.Size())
    http.ServeContent(w, r, "foo.txt", modTime, rs)
}

func main() {
	log.Printf("Running...")
	http.HandleFunc("/", handler)
	log.Fatal(http.ListenAndServe("127.0.0.1:8080", nil))
}

// START_1 OMIT
// A SizeReaderAt is a ReaderAt with a Size method.
//
// An io.SectionReader implements SizeReaderAt.
type SizeReaderAt interface {
	Size() int64
	io.ReaderAt
}

// NewMultiReaderAt is like io.MultiReader but produces a ReaderAt
// (and Size), instead of just a reader.
func NewMultiReaderAt(parts ...SizeReaderAt) SizeReaderAt {
	m := &multi{
		parts: make([]offsetAndSource, 0, len(parts)),
	}
	var off int64
	for _, p := range parts {
		m.parts = append(m.parts, offsetAndSource{off, p})
		off += p.Size()
	}
	m.size = off
	return m
}

// END_1 OMIT

type offsetAndSource struct {
	off int64
	SizeReaderAt
}

type multi struct {
	parts []offsetAndSource
	size  int64
}

func (m *multi) Size() int64 { return m.size }

func (m *multi) ReadAt(p []byte, off int64) (n int, err error) {
	wantN := len(p)

	// Skip past the requested offset.
	skipParts := sort.Search(len(m.parts), func(i int) bool {
		// This function returns whether parts[i] will
		// contribute any bytes to our output.
		part := m.parts[i]
		return part.off+part.Size() > off
	})
	parts := m.parts[skipParts:]

	// How far to skip in the first part.
	needSkip := off
	if len(parts) > 0 {
		needSkip -= parts[0].off
	}

	for len(parts) > 0 && len(p) > 0 {
		readP := p
		partSize := parts[0].Size()
		if int64(len(readP)) > partSize-needSkip {
			readP = readP[:partSize-needSkip]
		}
		pn, err0 := parts[0].ReadAt(readP, needSkip)
		if err0 != nil {
			return n, err0
		}
		n += pn
		p = p[pn:]
		if int64(pn)+needSkip == partSize {
			parts = parts[1:]
		}
		needSkip = 0
	}

	if n != wantN {
		err = io.ErrUnexpectedEOF
	}
	return
}

Things we get for free from net/http

Overall simplification

From this...

... to this.

And from page and pages of this...

... to this

So how does it compare to C++?

Could we have just rewritten it in new C++?

Could I have just fixed the bugs in the C++ version?

How much of dl.google.com is closed-source?

Thank you

Use the left and right arrow keys or click the left and right edges of the page to navigate between slides.
(Press 'H' or navigate to hide this message.)