fix(http): times out on large repositories (#428)

Ayman Bagabas created

This was due to having a _set_ value of Read/Write http server timeout
values, and a faulty git gzip request handler. The server drops the
connection if there wasn't any read/write within 10 seconds.

Replace the read/write timeouts with idle timeout which will reset the
counter to _either_ read/write within 10 seconds. Idle timeout is only
used when keep-alive is enabled. That is the case by default.

Fix git by properly handling gzip and buffered git service responses.

Improve git http handler logging

Fixes: https://github.com/charmbracelet/soft-serve/issues/427

Change summary

pkg/git/service.go |  5 +++
pkg/web/context.go |  6 +++
pkg/web/git.go     | 62 ++++++++++++++++++++++++++---------------------
pkg/web/http.go    |  5 +--
pkg/web/logging.go |  5 +--
pkg/web/server.go  |  6 +++-
pkg/web/util.go    |  2 
7 files changed, 53 insertions(+), 38 deletions(-)

Detailed changes

pkg/git/service.go 🔗

@@ -149,6 +149,11 @@ func gitServiceHandler(ctx context.Context, svc Service, scmd ServiceCommand) er
 	if err != nil && errors.Is(err, os.ErrNotExist) {
 		return ErrInvalidRepo
 	} else if err != nil {
+		var exitErr *exec.ExitError
+		if errors.As(err, &exitErr) && len(exitErr.Stderr) > 0 {
+			return fmt.Errorf("%s: %s", exitErr, exitErr.Stderr)
+		}
+
 		return err
 	}
 

pkg/web/context.go 🔗

@@ -24,7 +24,11 @@ func NewContextHandler(ctx context.Context) func(http.Handler) http.Handler {
 			ctx := r.Context()
 			ctx = config.WithContext(ctx, cfg)
 			ctx = backend.WithContext(ctx, be)
-			ctx = log.WithContext(ctx, logger)
+			ctx = log.WithContext(ctx, logger.With(
+				"method", r.Method,
+				"path", r.URL,
+				"addr", r.RemoteAddr,
+			))
 			ctx = db.WithContext(ctx, dbx)
 			ctx = store.WithContext(ctx, datastore)
 			r = r.WithContext(ctx)

pkg/web/git.go 🔗

@@ -413,12 +413,16 @@ func serviceRpc(w http.ResponseWriter, r *http.Request) {
 		}...)
 	}
 
+	var (
+		err    error
+		reader io.ReadCloser
+	)
+
 	// Handle gzip encoding
-	reader := r.Body
-	defer reader.Close() // nolint: errcheck
+	reader = r.Body
 	switch r.Header.Get("Content-Encoding") {
 	case "gzip":
-		reader, err := gzip.NewReader(reader)
+		reader, err = gzip.NewReader(reader)
 		if err != nil {
 			logger.Errorf("failed to create gzip reader: %v", err)
 			renderInternalServerError(w, r)
@@ -428,49 +432,51 @@ func serviceRpc(w http.ResponseWriter, r *http.Request) {
 	}
 
 	cmd.Stdin = reader
+	cmd.Stdout = &flushResponseWriter{w}
 
 	if err := service.Handler(ctx, cmd); err != nil {
-		if errors.Is(err, git.ErrInvalidRepo) {
-			renderNotFound(w, r)
-			return
-		}
-		renderInternalServerError(w, r)
+		logger.Errorf("failed to handle service: %v", err)
 		return
 	}
 
-	// Handle buffered output
-	// Useful when using proxies
-
-	// We know that `w` is an `http.ResponseWriter`.
-	flusher, ok := w.(http.Flusher)
-	if !ok {
-		logger.Errorf("expected http.ResponseWriter to be an http.Flusher, got %T", w)
-		return
+	if service == git.ReceivePackService {
+		if err := git.EnsureDefaultBranch(ctx, cmd); err != nil {
+			logger.Errorf("failed to ensure default branch: %s", err)
+		}
 	}
+}
 
+// Handle buffered output
+// Useful when using proxies
+type flushResponseWriter struct {
+	http.ResponseWriter
+}
+
+func (f *flushResponseWriter) ReadFrom(r io.Reader) (int64, error) {
+	flusher := http.NewResponseController(f.ResponseWriter) // nolint: bodyclose
+
+	var n int64
 	p := make([]byte, 1024)
 	for {
-		nRead, err := stdout.Read(p)
+		nRead, err := r.Read(p)
 		if err == io.EOF {
 			break
 		}
-		nWrite, err := w.Write(p[:nRead])
+		nWrite, err := f.ResponseWriter.Write(p[:nRead])
 		if err != nil {
-			logger.Errorf("failed to write data: %v", err)
-			return
+			return n, err
 		}
 		if nRead != nWrite {
-			logger.Errorf("failed to write data: %d read, %d written", nRead, nWrite)
-			return
+			return n, err
 		}
-		flusher.Flush()
-	}
-
-	if service == git.ReceivePackService {
-		if err := git.EnsureDefaultBranch(ctx, cmd); err != nil {
-			logger.Errorf("failed to ensure default branch: %s", err)
+		n += int64(nRead)
+		// ResponseWriter must support http.Flusher to handle buffered output.
+		if err := flusher.Flush(); err != nil {
+			return n, fmt.Errorf("%w: error while flush", err)
 		}
 	}
+
+	return n, nil
 }
 
 func getInfoRefs(w http.ResponseWriter, r *http.Request) {

pkg/web/http.go 🔗

@@ -19,7 +19,7 @@ type HTTPServer struct {
 // NewHTTPServer creates a new HTTP server.
 func NewHTTPServer(ctx context.Context) (*HTTPServer, error) {
 	cfg := config.FromContext(ctx)
-	logger := log.FromContext(ctx).WithPrefix("http")
+	logger := log.FromContext(ctx)
 	s := &HTTPServer{
 		ctx: ctx,
 		cfg: cfg,
@@ -27,8 +27,7 @@ func NewHTTPServer(ctx context.Context) (*HTTPServer, error) {
 			Addr:              cfg.HTTP.ListenAddr,
 			Handler:           NewRouter(ctx),
 			ReadHeaderTimeout: time.Second * 10,
-			ReadTimeout:       time.Second * 10,
-			WriteTimeout:      time.Second * 10,
+			IdleTimeout:       time.Second * 10,
 			MaxHeaderBytes:    http.DefaultMaxHeaderBytes,
 			ErrorLog:          logger.StandardLog(log.StandardLogOptions{ForceLevel: log.ErrorLevel}),
 		},

pkg/web/logging.go 🔗

@@ -64,14 +64,13 @@ func (r *logWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
 }
 
 // NewLoggingMiddleware returns a new logging middleware.
-func NewLoggingMiddleware(next http.Handler) http.Handler {
+func NewLoggingMiddleware(next http.Handler, logger *log.Logger) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		logger := log.FromContext(r.Context())
 		start := time.Now()
 		writer := &logWriter{code: http.StatusOK, ResponseWriter: w}
 		logger.Debug("request",
 			"method", r.Method,
-			"uri", r.RequestURI,
+			"path", r.URL,
 			"addr", r.RemoteAddr)
 		next.ServeHTTP(writer, r)
 		elapsed := time.Since(start)

pkg/web/server.go 🔗

@@ -4,12 +4,14 @@ import (
 	"context"
 	"net/http"
 
+	"github.com/charmbracelet/log"
 	"github.com/gorilla/handlers"
 	"github.com/gorilla/mux"
 )
 
 // NewRouter returns a new HTTP router.
 func NewRouter(ctx context.Context) http.Handler {
+	logger := log.FromContext(ctx).WithPrefix("http")
 	router := mux.NewRouter()
 
 	// Git routes
@@ -19,10 +21,10 @@ func NewRouter(ctx context.Context) http.Handler {
 
 	// Context handler
 	// Adds context to the request
-	h := NewContextHandler(ctx)(router)
+	h := NewLoggingMiddleware(router, logger)
+	h = NewContextHandler(ctx)(h)
 	h = handlers.CompressHandler(h)
 	h = handlers.RecoveryHandler()(h)
-	h = NewLoggingMiddleware(h)
 
 	return h
 }

pkg/web/util.go 🔗

@@ -8,7 +8,7 @@ import (
 
 func renderStatus(code int) http.HandlerFunc {
 	return func(w http.ResponseWriter, _ *http.Request) {
-		w.WriteHeader(code)
 		io.WriteString(w, fmt.Sprintf("%d %s", code, http.StatusText(code))) // nolint: errcheck
+		w.WriteHeader(code)
 	}
 }