fix hard-coded timeout and error panic in API archive download endpoint (#20925)

petergardfjall · web-flow · commit 4562d40fcead · 2022-08-29T11:45:20.000+02:00
* fix hard-coded timeout and error panic in API archive download endpoint

This commit updates the `GET /api/v1/repos/{owner}/{repo}/archive/{archive}`
endpoint which prior to this PR had a couple of issues.

1. The endpoint had a hard-coded 20s timeout for the archiver to complete after
   which a 500 (Internal Server Error) was returned to client. For a scripted
   API client there was no clear way of telling that the operation timed out and
   that it should retry.

2. Whenever the timeout _did occur_, the code used to panic. This was caused by
   the API endpoint "delegating" to the same call path as the web, which uses a
   slightly different way of reporting errors (HTML rather than JSON for
   example).

   More specifically, `api/v1/repo/file.go#GetArchive` just called through to
   `web/repo/repo.go#Download`, which expects the `Context` to have a `Render`
   field set, but which is `nil` for API calls. Hence, a `nil` pointer error.

The code addresses (1) by dropping the hard-coded timeout. Instead, any
timeout/cancelation on the incoming `Context` is used.

The code addresses (2) by updating the API endpoint to use a separate call path
for the API-triggered archive download. This avoids producing HTML-errors on
errors (it now produces JSON errors).

Signed-off-by: Peter Gardfjäll &lt;peter.gardfjall.work@gmail.com&gt;
diff --git a/cmd/migrate_storage.go b/cmd/migrate_storage.go
@@ -112,11 +112,8 @@ func migrateRepoAvatars(ctx context.Context, dstStorage storage.ObjectStorage) e
 
 func migrateRepoArchivers(ctx context.Context, dstStorage storage.ObjectStorage) error {
 	return db.IterateObjects(ctx, func(archiver *repo_model.RepoArchiver) error {
-		p, err := archiver.RelativePath()
-		if err != nil {
-			return err
-		}
-		_, err = storage.Copy(dstStorage, p, storage.RepoArchives, p)
+		p := archiver.RelativePath()
+		_, err := storage.Copy(dstStorage, p, storage.RepoArchives, p)
 		return err
 	})
 }
diff --git a/models/repo.go b/models/repo.go
@@ -218,8 +218,7 @@ func DeleteRepository(doer *user_model.User, uid, repoID int64) error {
 
 	archivePaths := make([]string, 0, len(archives))
 	for _, v := range archives {
-		p, _ := v.RelativePath()
-		archivePaths = append(archivePaths, p)
+		archivePaths = append(archivePaths, v.RelativePath())
 	}
 
 	if _, err := db.DeleteByBean(ctx, &repo_model.RepoArchiver{RepoID: repoID}); err != nil {
diff --git a/models/repo/archiver.go b/models/repo/archiver.go
@@ -39,9 +39,9 @@ func init() {
 	db.RegisterModel(new(RepoArchiver))
 }
 
-// RelativePath returns relative path
-func (archiver *RepoArchiver) RelativePath() (string, error) {
-	return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()), nil
+// RelativePath returns the archive path relative to the archive storage root.
+func (archiver *RepoArchiver) RelativePath() string {
+	return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String())
 }
 
 var delRepoArchiver = new(RepoArchiver)
diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go
@@ -8,6 +8,7 @@ package repo
 import (
 	"bytes"
 	"encoding/base64"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -28,7 +29,7 @@ import (
 	api "code.gitea.io/gitea/modules/structs"
 	"code.gitea.io/gitea/modules/web"
 	"code.gitea.io/gitea/routers/common"
-	"code.gitea.io/gitea/routers/web/repo"
+	archiver_service "code.gitea.io/gitea/services/repository/archiver"
 	files_service "code.gitea.io/gitea/services/repository/files"
 )
 
@@ -294,7 +295,53 @@ func GetArchive(ctx *context.APIContext) {
 		defer gitRepo.Close()
 	}
 
-	repo.Download(ctx.Context)
+	archiveDownload(ctx)
+}
+
+func archiveDownload(ctx *context.APIContext) {
+	uri := ctx.Params("*")
+	aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri)
+	if err != nil {
+		if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) {
+			ctx.Error(http.StatusBadRequest, "unknown archive format", err)
+		} else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) {
+			ctx.Error(http.StatusNotFound, "unrecognized reference", err)
+		} else {
+			ctx.ServerError("archiver_service.NewRequest", err)
+		}
+		return
+	}
+
+	archiver, err := aReq.Await(ctx)
+	if err != nil {
+		ctx.ServerError("archiver.Await", err)
+		return
+	}
+
+	download(ctx, aReq.GetArchiveName(), archiver)
+}
+
+func download(ctx *context.APIContext, archiveName string, archiver *repo_model.RepoArchiver) {
+	downloadName := ctx.Repo.Repository.Name + "-" + archiveName
+
+	rPath := archiver.RelativePath()
+	if setting.RepoArchive.ServeDirect {
+		// If we have a signed url (S3, object storage), redirect to this directly.
+		u, err := storage.RepoArchives.URL(rPath, downloadName)
+		if u != nil && err == nil {
+			ctx.Redirect(u.String())
+			return
+		}
+	}
+
+	// If we have matched and access to release or issue
+	fr, err := storage.RepoArchives.Open(rPath)
+	if err != nil {
+		ctx.ServerError("Open", err)
+		return
+	}
+	defer fr.Close()
+	ctx.ServeContent(downloadName, fr, archiver.CreatedUnix.AsLocalTime())
 }
 
 // GetEditorconfig get editor config of a repository
diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go
@@ -10,7 +10,6 @@ import (
 	"fmt"
 	"net/http"
 	"strings"
-	"time"
 
 	"code.gitea.io/gitea/models"
 	"code.gitea.io/gitea/models/db"
@@ -22,7 +21,6 @@ import (
 	"code.gitea.io/gitea/modules/base"
 	"code.gitea.io/gitea/modules/context"
 	"code.gitea.io/gitea/modules/convert"
-	"code.gitea.io/gitea/modules/graceful"
 	"code.gitea.io/gitea/modules/log"
 	repo_module "code.gitea.io/gitea/modules/repository"
 	"code.gitea.io/gitea/modules/setting"
@@ -390,68 +388,27 @@ func Download(ctx *context.Context) {
 	if err != nil {
 		if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) {
 			ctx.Error(http.StatusBadRequest, err.Error())
+		} else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) {
+			ctx.Error(http.StatusNotFound, err.Error())
 		} else {
 			ctx.ServerError("archiver_service.NewRequest", err)
 		}
 		return
 	}
-	if aReq == nil {
-		ctx.Error(http.StatusNotFound)
-		return
-	}
 
-	archiver, err := repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
+	archiver, err := aReq.Await(ctx)
 	if err != nil {
-		ctx.ServerError("models.GetRepoArchiver", err)
+		ctx.ServerError("archiver.Await", err)
 		return
 	}
-	if archiver != nil && archiver.Status == repo_model.ArchiverReady {
-		download(ctx, aReq.GetArchiveName(), archiver)
-		return
-	}
-
-	if err := archiver_service.StartArchive(aReq); err != nil {
-		ctx.ServerError("archiver_service.StartArchive", err)
-		return
-	}
-
-	var times int
-	t := time.NewTicker(time.Second * 1)
-	defer t.Stop()
 
-	for {
-		select {
-		case <-graceful.GetManager().HammerContext().Done():
-			log.Warn("exit archive download because system stop")
-			return
-		case <-t.C:
-			if times > 20 {
-				ctx.ServerError("wait download timeout", nil)
-				return
-			}
-			times++
-			archiver, err = repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
-			if err != nil {
-				ctx.ServerError("archiver_service.StartArchive", err)
-				return
-			}
-			if archiver != nil && archiver.Status == repo_model.ArchiverReady {
-				download(ctx, aReq.GetArchiveName(), archiver)
-				return
-			}
-		}
-	}
+	download(ctx, aReq.GetArchiveName(), archiver)
 }
 
 func download(ctx *context.Context, archiveName string, archiver *repo_model.RepoArchiver) {
 	downloadName := ctx.Repo.Repository.Name + "-" + archiveName
 
-	rPath, err := archiver.RelativePath()
-	if err != nil {
-		ctx.ServerError("archiver.RelativePath", err)
-		return
-	}
-
+	rPath := archiver.RelativePath()
 	if setting.RepoArchive.ServeDirect {
 		// If we have a signed url (S3, object storage), redirect to this directly.
 		u, err := storage.RepoArchives.URL(rPath, downloadName)
diff --git a/services/repository/archiver/archiver.go b/services/repository/archiver/archiver.go
@@ -57,6 +57,21 @@ func (ErrUnknownArchiveFormat) Is(err error) bool {
 	return ok
 }
 
+// RepoRefNotFoundError is returned when a requested reference (commit, tag) was not found.
+type RepoRefNotFoundError struct {
+	RefName string
+}
+
+// Error implements error.
+func (e RepoRefNotFoundError) Error() string {
+	return fmt.Sprintf("unrecognized repository reference: %s", e.RefName)
+}
+
+func (e RepoRefNotFoundError) Is(err error) bool {
+	_, ok := err.(RepoRefNotFoundError)
+	return ok
+}
+
 // NewRequest creates an archival request, based on the URI.  The
 // resulting ArchiveRequest is suitable for being passed to ArchiveRepository()
 // if it's determined that the request still needs to be satisfied.
@@ -103,7 +118,7 @@ func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest
 			}
 		}
 	} else {
-		return nil, fmt.Errorf("Unknow ref %s type", r.refName)
+		return nil, RepoRefNotFoundError{RefName: r.refName}
 	}
 
 	return r, nil
@@ -115,6 +130,49 @@ func (aReq *ArchiveRequest) GetArchiveName() string {
 	return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String()
 }
 
+// Await awaits the completion of an ArchiveRequest. If the archive has
+// already been prepared the method returns immediately. Otherwise an archiver
+// process will be started and its completion awaited. On success the returned
+// RepoArchiver may be used to download the archive. Note that even if the
+// context is cancelled/times out a started archiver will still continue to run
+// in the background.
+func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver, error) {
+	archiver, err := repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
+	if err != nil {
+		return nil, fmt.Errorf("models.GetRepoArchiver: %v", err)
+	}
+
+	if archiver != nil && archiver.Status == repo_model.ArchiverReady {
+		// Archive already generated, we're done.
+		return archiver, nil
+	}
+
+	if err := StartArchive(aReq); err != nil {
+		return nil, fmt.Errorf("archiver.StartArchive: %v", err)
+	}
+
+	poll := time.NewTicker(time.Second * 1)
+	defer poll.Stop()
+
+	for {
+		select {
+		case <-graceful.GetManager().HammerContext().Done():
+			// System stopped.
+			return nil, graceful.GetManager().HammerContext().Err()
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		case <-poll.C:
+			archiver, err = repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
+			if err != nil {
+				return nil, fmt.Errorf("repo_model.GetRepoArchiver: %v", err)
+			}
+			if archiver != nil && archiver.Status == repo_model.ArchiverReady {
+				return archiver, nil
+			}
+		}
+	}
+}
+
 func doArchive(r *ArchiveRequest) (*repo_model.RepoArchiver, error) {
 	txCtx, committer, err := db.TxContext()
 	if err != nil {
@@ -147,11 +205,7 @@ func doArchive(r *ArchiveRequest) (*repo_model.RepoArchiver, error) {
 		}
 	}
 
-	rPath, err := archiver.RelativePath()
-	if err != nil {
-		return nil, err
-	}
-
+	rPath := archiver.RelativePath()
 	_, err = storage.RepoArchives.Stat(rPath)
 	if err == nil {
 		if archiver.Status == repo_model.ArchiverGenerating {
@@ -284,13 +338,10 @@ func StartArchive(request *ArchiveRequest) error {
 }
 
 func deleteOldRepoArchiver(ctx context.Context, archiver *repo_model.RepoArchiver) error {
-	p, err := archiver.RelativePath()
-	if err != nil {
-		return err
-	}
 	if err := repo_model.DeleteRepoArchiver(ctx, archiver); err != nil {
 		return err
 	}
+	p := archiver.RelativePath()
 	if err := storage.RepoArchives.Delete(p); err != nil {
 		log.Error("delete repo archive file failed: %v", err)
 	}

Original file line number	Diff line number	Diff line change
`@@ -218,8 +218,7 @@ func DeleteRepository(doer *user_model.User, uid, repoID int64) error {`
`218`	`218`
`219`	`219`	`archivePaths := make([]string, 0, len(archives))`
`220`	`220`	`for _, v := range archives {`
`221`		`- p, _ := v.RelativePath()`
`222`		`- archivePaths = append(archivePaths, p)`
	`221`	`+ archivePaths = append(archivePaths, v.RelativePath())`
`223`	`222`	`}`
`224`	`223`
`225`	`224`	`if _, err := db.DeleteByBean(ctx, &repo_model.RepoArchiver{RepoID: repoID}); err != nil {`
Original file line number	Diff line number	Diff line change
`@@ -39,9 +39,9 @@ func init() {`
`39`	`39`	`db.RegisterModel(new(RepoArchiver))`
`40`	`40`	`}`
`41`	`41`
`42`		`-// RelativePath returns relative path`
`43`		`-func (archiver *RepoArchiver) RelativePath() (string, error) {`
`44`		`- return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()), nil`
	`42`	`+// RelativePath returns the archive path relative to the archive storage root.`
	`43`	`+func (archiver *RepoArchiver) RelativePath() string {`
	`44`	`+ return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String())`
`45`	`45`	`}`
`46`	`46`
`47`	`47`	`var delRepoArchiver = new(RepoArchiver)`