matrix-doc/scripts/speculator/main.go

551 lines
15 KiB
Go

// speculator allows you to preview pull requests to the matrix.org specification.
// It serves the following HTTP endpoints:
// - / lists open pull requests
// - /spec/123 which renders the spec as html at pull request 123.
// - /diff/rst/123 which gives a diff of the spec's rst at pull request 123.
// - /diff/html/123 which gives a diff of the spec's HTML at pull request 123.
// It is currently woefully inefficient, and there is a lot of low hanging fruit for improvement.
package main
import (
"bytes"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"math/rand"
"net/http"
"net/url"
"os"
"os/exec"
"path"
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/hashicorp/golang-lru"
)
type PullRequest struct {
Number int
Base Commit
Head Commit
Title string
User User
HTMLURL string `json:"html_url"`
}
type Commit struct {
SHA string
Repo RequestRepo
}
type RequestRepo struct {
CloneURL string `json:"clone_url"`
}
type User struct {
Login string
HTMLURL string `json:"html_url"`
}
var (
port = flag.Int("port", 9000, "Port on which to listen for HTTP")
includesDir = flag.String("includes_dir", "", "Directory containing include files for styling like matrix.org")
allowedMembers map[string]bool
specCache *lru.Cache // string -> map[string][]byte filename -> contents
styledSpecCache *lru.Cache // string -> map[string][]byte filename -> contents
)
func (u *User) IsTrusted() bool {
return allowedMembers[u.Login]
}
const (
pullsPrefix = "https://api.github.com/repos/matrix-org/matrix-doc/pulls"
matrixDocCloneURL = "https://github.com/matrix-org/matrix-doc.git"
permissionsOwnerFull = 0700
)
func gitClone(url string, shared bool) (string, error) {
directory := path.Join("/tmp/matrix-doc", strconv.FormatInt(rand.Int63(), 10))
if err := os.MkdirAll(directory, permissionsOwnerFull); err != nil {
return "", fmt.Errorf("error making directory %s: %v", directory, err)
}
args := []string{"clone", url, directory}
if shared {
args = append(args, "--shared")
}
if err := runGitCommand(directory, args); err != nil {
return "", err
}
return directory, nil
}
func gitCheckout(path, sha string) error {
return runGitCommand(path, []string{"checkout", sha})
}
func runGitCommand(path string, args []string) error {
cmd := exec.Command("git", args...)
cmd.Dir = path
var b bytes.Buffer
cmd.Stderr = &b
if err := cmd.Run(); err != nil {
return fmt.Errorf("error running %q: %v (stderr: %s)", strings.Join(cmd.Args, " "), err, b.String())
}
return nil
}
func lookupPullRequest(url url.URL, pathPrefix string) (*PullRequest, error) {
if !strings.HasPrefix(url.Path, pathPrefix+"/") {
return nil, fmt.Errorf("invalid path passed: %s expect %s/123", url.Path, pathPrefix)
}
prNumber := strings.Split(url.Path[len(pathPrefix)+1:], "/")[0]
resp, err := http.Get(fmt.Sprintf("%s/%s", pullsPrefix, prNumber))
defer resp.Body.Close()
if err != nil {
return nil, fmt.Errorf("error getting pulls: %v", err)
}
dec := json.NewDecoder(resp.Body)
var pr PullRequest
if err := dec.Decode(&pr); err != nil {
return nil, fmt.Errorf("error decoding pulls: %v", err)
}
return &pr, nil
}
func generate(dir string) error {
cmd := exec.Command("python", "gendoc.py", "--nodelete")
cmd.Dir = path.Join(dir, "scripts")
var b bytes.Buffer
cmd.Stderr = &b
if err := cmd.Run(); err != nil {
return fmt.Errorf("error generating spec: %v\nOutput from gendoc:\n%v", err, b.String())
}
return nil
}
func writeError(w http.ResponseWriter, code int, err error) {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(code)
io.WriteString(w, fmt.Sprintf("%v\n", err))
}
type server struct {
mu sync.Mutex // Must be locked around any git command on matrixDocCloneURL
matrixDocCloneURL string
}
func (s *server) updateBase() error {
s.mu.Lock()
defer s.mu.Unlock()
return runGitCommand(s.matrixDocCloneURL, []string{"fetch"})
}
// canCheckout returns whether a given sha can currently be checked out from s.matrixDocCloneURL.
func (s *server) canCheckout(sha string) bool {
s.mu.Lock()
defer s.mu.Unlock()
return runGitCommand(s.matrixDocCloneURL, []string{"cat-file", "-e", sha + "^{commit}"}) == nil
}
// generateAt generates spec from repo at sha.
// Returns the path where the generation was done.
func (s *server) generateAt(sha string) (dst string, err error) {
if !s.canCheckout(sha) {
err = s.updateBase()
if err != nil {
return
}
}
s.mu.Lock()
dst, err = gitClone(s.matrixDocCloneURL, true)
s.mu.Unlock()
if err != nil {
return
}
if err = gitCheckout(dst, sha); err != nil {
return
}
err = generate(dst)
return
}
func (s *server) getSHAOf(ref string) (string, error) {
cmd := exec.Command("git", "rev-list", ref, "-n1")
cmd.Dir = path.Join(s.matrixDocCloneURL)
var b bytes.Buffer
cmd.Stdout = &b
s.mu.Lock()
err := cmd.Run()
s.mu.Unlock()
if err != nil {
return "", fmt.Errorf("error generating spec: %v\nOutput from gendoc:\n%v", err, b.String())
}
return strings.TrimSpace(b.String()), nil
}
// extractPath extracts the file path within the gen directory which should be served for the request.
// Returns one of (file to serve, path to redirect to).
// path is the actual path being requested, e.g. "/spec/head/client_server.html".
// base is the base path of the handler, including a trailing slash, before the PR number, e.g. "/spec/".
func extractPath(path, base string) (string, string) {
// Assumes exactly one flat directory
// Count slashes in /spec/head/client_server.html
// base is /spec/
// +1 for the PR number - /spec/head
// +1 for the path-part after the slash after the PR number
max := strings.Count(base, "/") + 2
parts := strings.SplitN(path, "/", max)
if len(parts) < max {
// Path is base/pr - redirect to base/pr/index.html
return "", path + "/index.html"
}
if parts[max-1] == "" {
// Path is base/pr/ - serve index.html
return "index.html", ""
}
// Path is base/pr/file.html - serve file
return parts[max-1], ""
}
func (s *server) serveSpec(w http.ResponseWriter, req *http.Request) {
var sha string
var styleLikeMatrixDotOrg = req.URL.Query().Get("matrixdotorgstyle") != ""
if styleLikeMatrixDotOrg && *includesDir == "" {
writeError(w, 500, fmt.Errorf("Cannot style like matrix.org - no include dir specified"))
return
}
if strings.HasPrefix(strings.ToLower(req.URL.Path), "/spec/head") {
// err may be non-nil here but if headSha is non-empty we will serve a possibly-stale result in favour of erroring.
// This is to deal with cases like where github is down but we still want to serve the spec.
if headSha, err := s.lookupHeadSHA(); headSha == "" {
writeError(w, 500, err)
return
} else {
sha = headSha
}
} else {
pr, err := lookupPullRequest(*req.URL, "/spec")
if err != nil {
writeError(w, 400, err)
return
}
// We're going to run whatever Python is specified in the pull request, which
// may do bad things, so only trust people we trust.
if err := checkAuth(pr); err != nil {
writeError(w, 403, err)
return
}
sha = pr.Head.SHA
}
var cache = specCache
if styleLikeMatrixDotOrg {
cache = styledSpecCache
}
var pathToContent map[string][]byte
if cached, ok := cache.Get(sha); ok {
pathToContent = cached.(map[string][]byte)
} else {
dst, err := s.generateAt(sha)
defer os.RemoveAll(dst)
if err != nil {
writeError(w, 500, err)
return
}
if styleLikeMatrixDotOrg {
cmd := exec.Command("./add-matrix-org-stylings.sh", *includesDir)
cmd.Dir = path.Join(dst, "scripts")
var b bytes.Buffer
cmd.Stderr = &b
if err := cmd.Run(); err != nil {
writeError(w, 500, fmt.Errorf("error styling spec: %v\nOutput:\n%v", err, b.String()))
return
}
}
fis, err := ioutil.ReadDir(path.Join(dst, "scripts", "gen"))
if err != nil {
writeError(w, 500, fmt.Errorf("Error reading directory: %v", err))
}
pathToContent = make(map[string][]byte)
for _, fi := range fis {
b, err := ioutil.ReadFile(path.Join(dst, "scripts", "gen", fi.Name()))
if err != nil {
writeError(w, 500, fmt.Errorf("Error reading spec: %v", err))
return
}
pathToContent[fi.Name()] = b
}
cache.Add(sha, pathToContent)
}
requestedPath, redirect := extractPath(req.URL.Path, "/spec/")
if redirect != "" {
s.redirectTo(w, req, redirect)
return
}
if b, ok := pathToContent[requestedPath]; ok {
w.Write(b)
return
}
if requestedPath == "index.html" {
// Fall back to single-page spec for old PRs
if b, ok := pathToContent["specification.html"]; ok {
w.Write(b)
return
}
}
w.WriteHeader(404)
w.Write([]byte("Not found"))
}
func (s *server) redirectTo(w http.ResponseWriter, req *http.Request, path string) {
req.URL.Path = path
w.Header().Set("Location", req.URL.String())
w.WriteHeader(302)
}
// lookupHeadSHA looks up what origin/master's HEAD SHA is.
// It attempts to `git fetch` before doing so.
// If this fails, it may still return a stale sha, but will also return an error.
func (s *server) lookupHeadSHA() (sha string, retErr error) {
retErr = s.updateBase()
if retErr != nil {
log.Printf("Error fetching: %v, attempting to fall back to current known value", retErr)
}
originHead, err := s.getSHAOf("origin/master")
if err != nil {
retErr = err
}
sha = originHead
if retErr != nil && originHead != "" {
log.Printf("Successfully fell back to possibly stale sha: %s", sha)
}
return
}
func checkAuth(pr *PullRequest) error {
if !pr.User.IsTrusted() {
return fmt.Errorf("%q is not a trusted pull requester", pr.User.Login)
}
return nil
}
func (s *server) serveRSTDiff(w http.ResponseWriter, req *http.Request) {
pr, err := lookupPullRequest(*req.URL, "/diff/rst")
if err != nil {
writeError(w, 400, err)
return
}
// We're going to run whatever Python is specified in the pull request, which
// may do bad things, so only trust people we trust.
if err := checkAuth(pr); err != nil {
writeError(w, 403, err)
return
}
base, err := s.generateAt(pr.Base.SHA)
defer os.RemoveAll(base)
if err != nil {
writeError(w, 500, err)
return
}
head, err := s.generateAt(pr.Head.SHA)
defer os.RemoveAll(head)
if err != nil {
writeError(w, 500, err)
return
}
diffCmd := exec.Command("diff", "-r", "-u", path.Join(base, "scripts", "tmp"), path.Join(head, "scripts", "tmp"))
var diff bytes.Buffer
diffCmd.Stdout = &diff
if err := ignoreExitCodeOne(diffCmd.Run()); err != nil {
writeError(w, 500, fmt.Errorf("error running diff: %v", err))
return
}
w.Write(diff.Bytes())
}
func (s *server) serveHTMLDiff(w http.ResponseWriter, req *http.Request) {
pr, err := lookupPullRequest(*req.URL, "/diff/html")
if err != nil {
writeError(w, 400, err)
return
}
// We're going to run whatever Python is specified in the pull request, which
// may do bad things, so only trust people we trust.
if err := checkAuth(pr); err != nil {
writeError(w, 403, err)
return
}
base, err := s.generateAt(pr.Base.SHA)
defer os.RemoveAll(base)
if err != nil {
writeError(w, 500, err)
return
}
head, err := s.generateAt(pr.Head.SHA)
defer os.RemoveAll(head)
if err != nil {
writeError(w, 500, err)
return
}
htmlDiffer, err := findHTMLDiffer()
if err != nil {
writeError(w, 500, fmt.Errorf("could not find HTML differ"))
return
}
requestedPath, redirect := extractPath(req.URL.Path, "/diff/spec/")
if redirect != "" {
s.redirectTo(w, req, redirect)
return
}
cmd := exec.Command(htmlDiffer, path.Join(base, "scripts", "gen", requestedPath), path.Join(head, "scripts", "gen", requestedPath))
var b bytes.Buffer
cmd.Stdout = &b
if err := cmd.Run(); err != nil {
writeError(w, 500, fmt.Errorf("error running HTML differ: %v", err))
return
}
w.Write(b.Bytes())
}
func findHTMLDiffer() (string, error) {
wd, err := os.Getwd()
if err != nil {
return "", err
}
differ := path.Join(wd, "htmldiff.pl")
if _, err := os.Stat(differ); err == nil {
return differ, nil
}
return "", fmt.Errorf("unable to find htmldiff.pl")
}
func listPulls(w http.ResponseWriter, req *http.Request) {
resp, err := http.Get(pullsPrefix)
if err != nil {
writeError(w, 500, err)
return
}
defer resp.Body.Close()
dec := json.NewDecoder(resp.Body)
var pulls []PullRequest
if err := dec.Decode(&pulls); err != nil {
writeError(w, 500, err)
return
}
if len(pulls) == 0 {
io.WriteString(w, "No pull requests found")
return
}
s := "<body><ul>"
for _, pull := range pulls {
s += fmt.Sprintf(`<li>%d: <a href="%s">%s</a>: <a href="%s">%s</a>: <a href="spec/%d/">spec</a> <a href="diff/html/%d/">spec diff</a> <a href="diff/rst/%d/">rst diff</a></li>`,
pull.Number, pull.User.HTMLURL, pull.User.Login, pull.HTMLURL, pull.Title, pull.Number, pull.Number, pull.Number)
}
s += `</ul><div><a href="spec/head/">View the spec at head</a></div></body>`
if *includesDir != "" {
s += `</ul><div><a href="spec/head/?matrixdotorgstyle=1">View the spec at head styled like matrix.org</a></div></body>`
}
io.WriteString(w, s)
}
func ignoreExitCodeOne(err error) error {
if err == nil {
return err
}
if exiterr, ok := err.(*exec.ExitError); ok {
if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
if status.ExitStatus() == 1 {
return nil
}
}
}
return err
}
func main() {
flag.Parse()
// It would be great to read this from github, but there's no convenient way to do so.
// Most of these memberships are "private", so would require some kind of auth.
allowedMembers = map[string]bool{
"dbkr": true,
"erikjohnston": true,
"illicitonion": true,
"Kegsay": true,
"NegativeMjark": true,
"richvdh": true,
"leonerd": true,
}
if err := initCache(); err != nil {
log.Fatal(err)
}
rand.Seed(time.Now().Unix())
masterCloneDir, err := gitClone(matrixDocCloneURL, false)
if err != nil {
log.Fatal(err)
}
s := server{matrixDocCloneURL: masterCloneDir}
http.HandleFunc("/spec/", forceHTML(s.serveSpec))
http.HandleFunc("/diff/rst/", s.serveRSTDiff)
http.HandleFunc("/diff/html/", forceHTML(s.serveHTMLDiff))
http.HandleFunc("/healthz", serveText("ok"))
http.HandleFunc("/", forceHTML(listPulls))
fmt.Printf("Listening on port %d\n", *port)
log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", *port), nil))
}
func forceHTML(h func(w http.ResponseWriter, req *http.Request)) func(w http.ResponseWriter, req *http.Request) {
return func(w http.ResponseWriter, req *http.Request) {
w.Header().Set("Content-Type", "text/html")
h(w, req)
}
}
func serveText(s string) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, req *http.Request) {
io.WriteString(w, s)
}
}
func initCache() error {
c1, err := lru.New(50) // Evict after 50 entries (i.e. 50 sha1s)
specCache = c1
c2, err := lru.New(50) // Evict after 50 entries (i.e. 50 sha1s)
styledSpecCache = c2
return err
}