diff --git a/integrations/download_test.go b/integrations/download_test.go index f46122d95108..9594febc11e7 100644 --- a/integrations/download_test.go +++ b/integrations/download_test.go @@ -5,6 +5,7 @@ package integrations import ( + "mime" "net/http" "testing" @@ -91,3 +92,32 @@ func TestDownloadRawTextFileWithMimeTypeMapping(t *testing.T) { delete(setting.MimeTypeMap.Map, ".xml") setting.MimeTypeMap.Enabled = false } + +func TestDownloadRawBinaryFileWithoutMimeTypeMapping(t *testing.T) { + defer prepareTestEnv(t)() + + session := loginUser(t, "user2") + + req := NewRequest(t, "GET", "/user2/repo2/raw/branch/master/bin.foo") + resp := session.MakeRequest(t, req, http.StatusOK) + + assert.Equal(t, "application/octet-stream", resp.HeaderMap.Get("Content-Type")) +} + +func TestDownloadRawBinaryFileWithMimeTypeMapping(t *testing.T) { + defer prepareTestEnv(t)() + + setting.MimeTypeMap.Map[".foo"] = "audio/foo" + setting.MimeTypeMap.Enabled = true + _ = mime.AddExtensionType(".foo", "audio/foo") + + session := loginUser(t, "user2") + + req := NewRequest(t, "GET", "/user2/repo2/raw/branch/master/bin.foo") + resp := session.MakeRequest(t, req, http.StatusOK) + + assert.Equal(t, "audio/foo", resp.HeaderMap.Get("Content-Type")) + + delete(setting.MimeTypeMap.Map, ".foo") + setting.MimeTypeMap.Enabled = false +} diff --git a/integrations/gitea-repositories-meta/user2/repo2.git/objects/25/2b2353ba950ba3c0457c1acf214937e168db68 b/integrations/gitea-repositories-meta/user2/repo2.git/objects/25/2b2353ba950ba3c0457c1acf214937e168db68 new file mode 100644 index 000000000000..ca83a70756f8 Binary files /dev/null and b/integrations/gitea-repositories-meta/user2/repo2.git/objects/25/2b2353ba950ba3c0457c1acf214937e168db68 differ diff --git a/integrations/gitea-repositories-meta/user2/repo2.git/objects/80/8eedf6b8dd519aa89b59af2d815ed668580fc2 b/integrations/gitea-repositories-meta/user2/repo2.git/objects/80/8eedf6b8dd519aa89b59af2d815ed668580fc2 new file mode 100644 index 000000000000..75fc9a707472 Binary files /dev/null and b/integrations/gitea-repositories-meta/user2/repo2.git/objects/80/8eedf6b8dd519aa89b59af2d815ed668580fc2 differ diff --git a/integrations/gitea-repositories-meta/user2/repo2.git/objects/ee/c3fee8b8e28307e5c8c9099fac5eb583f797b4 b/integrations/gitea-repositories-meta/user2/repo2.git/objects/ee/c3fee8b8e28307e5c8c9099fac5eb583f797b4 new file mode 100644 index 000000000000..a0d2c16297a6 Binary files /dev/null and b/integrations/gitea-repositories-meta/user2/repo2.git/objects/ee/c3fee8b8e28307e5c8c9099fac5eb583f797b4 differ diff --git a/integrations/gitea-repositories-meta/user2/repo2.git/refs/heads/master b/integrations/gitea-repositories-meta/user2/repo2.git/refs/heads/master index 334d09ca0215..c0214d49eef4 100644 --- a/integrations/gitea-repositories-meta/user2/repo2.git/refs/heads/master +++ b/integrations/gitea-repositories-meta/user2/repo2.git/refs/heads/master @@ -1 +1 @@ -1032bbf17fbc0d9c95bb5418dabe8f8c99278700 +808eedf6b8dd519aa89b59af2d815ed668580fc2 diff --git a/modules/charset/charset.go b/modules/charset/charset.go index a1210d2f05dc..3bef6de64e68 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -129,6 +129,18 @@ func RemoveBOMIfPresent(content []byte) []byte { return content } +// DetectEncodingFromReader +// Read the head 1024 bytes from the reader and detect it's encoding +// Note: you may need reader.Seek(0, io.SeekStart) to reset the offset +func DetectEncodingFromReader(reader io.Reader) (string, error) { + buf := make([]byte, 1024) + n, err := util.ReadAtMost(reader, buf) + if err != nil { + return "", fmt.Errorf("DetectEncoding io error: %w", err) + } + return DetectEncoding(buf[:n]) +} + // DetectEncoding detect the encoding of content func DetectEncoding(content []byte) (string, error) { // First we check if the content represents valid utf8 content excepting a truncated character at the end. diff --git a/modules/lfs/content_store.go b/modules/lfs/content_store.go index c794a1feccc6..e8336ecb0726 100644 --- a/modules/lfs/content_store.go +++ b/modules/lfs/content_store.go @@ -114,7 +114,7 @@ func (s *ContentStore) Verify(pointer Pointer) (bool, error) { } // ReadMetaObject will read a git_model.LFSMetaObject and return a reader -func ReadMetaObject(pointer Pointer) (io.ReadCloser, error) { +func ReadMetaObject(pointer Pointer) (io.ReadSeekCloser, error) { contentStore := NewContentStore() return contentStore.Get(pointer) } diff --git a/modules/setting/mime_type_map.go b/modules/setting/mime_type_map.go index 8e5b864e2413..add2a3acc240 100644 --- a/modules/setting/mime_type_map.go +++ b/modules/setting/mime_type_map.go @@ -4,7 +4,12 @@ package setting -import "strings" +import ( + "mime" + "strings" + + "code.gitea.io/gitea/modules/log" +) // MimeTypeMap defines custom mime type mapping settings var MimeTypeMap = struct { @@ -21,6 +26,10 @@ func newMimeTypeMap() { m := make(map[string]string, len(keys)) for _, key := range keys { m[strings.ToLower(key.Name())] = key.Value() + err := mime.AddExtensionType(key.Name(), key.Value()) + if err != nil { + log.Warn("mime.AddExtensionType(%s,%s): %v", key.Name(), key.Value(), err) + } } MimeTypeMap.Map = m if len(keys) > 0 { diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index b6a6646d50ce..fd0e45d858fa 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -7,7 +7,9 @@ package typesniffer import ( "fmt" "io" + "mime" "net/http" + "path/filepath" "regexp" "strings" @@ -36,32 +38,32 @@ type SniffedType struct { // IsText etects if content format is plain text. func (ct SniffedType) IsText() bool { - return strings.Contains(ct.contentType, "text/") + return strings.HasPrefix(ct.contentType, "text/") } // IsImage detects if data is an image format func (ct SniffedType) IsImage() bool { - return strings.Contains(ct.contentType, "image/") + return strings.HasPrefix(ct.contentType, "image/") } // IsSvgImage detects if data is an SVG image format func (ct SniffedType) IsSvgImage() bool { - return strings.Contains(ct.contentType, SvgMimeType) + return strings.HasPrefix(ct.contentType, SvgMimeType) } // IsPDF detects if data is a PDF format func (ct SniffedType) IsPDF() bool { - return strings.Contains(ct.contentType, "application/pdf") + return strings.HasPrefix(ct.contentType, "application/pdf") } // IsVideo detects if data is an video format func (ct SniffedType) IsVideo() bool { - return strings.Contains(ct.contentType, "video/") + return strings.HasPrefix(ct.contentType, "video/") } // IsAudio detects if data is an video format func (ct SniffedType) IsAudio() bool { - return strings.Contains(ct.contentType, "audio/") + return strings.HasPrefix(ct.contentType, "audio/") } // IsRepresentableAsText returns true if file content can be represented as @@ -70,6 +72,11 @@ func (ct SniffedType) IsRepresentableAsText() bool { return ct.IsText() || ct.IsSvgImage() } +// Mime return the mime +func (ct SniffedType) Mime() string { + return strings.Split(ct.contentType, ";")[0] +} + // DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty. func DetectContentType(data []byte) SniffedType { if len(data) == 0 { @@ -82,8 +89,8 @@ func DetectContentType(data []byte) SniffedType { data = data[:sniffLen] } - if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) || - strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) { + if (strings.HasPrefix(ct, "text/plain") || strings.HasPrefix(ct, "text/html")) && svgTagRegex.Match(data) || + strings.HasPrefix(ct, "text/xml") && svgTagInXMLRegex.Match(data) { // SVG is unsupported. https://github.com/golang/go/issues/15888 ct = SvgMimeType } @@ -91,6 +98,26 @@ func DetectContentType(data []byte) SniffedType { return SniffedType{ct} } +// DetectContentTypeExtFirst +// detect content type by `name` first, if not found, detect by `reader` +// Note: you may need `reader.Seek(0, io.SeekStart)` to reset the offset +func DetectContentTypeExtFirst(name string, bytesOrReader interface{}) (SniffedType, error) { + ct := mime.TypeByExtension(filepath.Ext(name)) + // FIXME: Not sure if it's necessary to keep the old behavior. + // if ct != "" && !strings.HasPrefix(ct, "text/") { + if ct != "" { + return SniffedType{ct}, nil + } + if r, ok := bytesOrReader.(io.Reader); ok { + st, err := DetectContentTypeFromReader(r) + if nil != err { + return SniffedType{}, err + } + return st, nil + } + return DetectContentType(bytesOrReader.([]byte)), nil +} + // DetectContentTypeFromReader guesses the content type contained in the reader. func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) { buf := make([]byte, sniffLen) diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go index ba8a938b8308..5ddc66b54781 100644 --- a/routers/api/v1/repo/file.go +++ b/routers/api/v1/repo/file.go @@ -173,7 +173,7 @@ func GetRawFileOrLFS(ctx *context.APIContext) { } // OK not cached - serve! - if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil { + if err := common.ServeLargeFile(ctx.Context, ctx.Repo.TreePath, lastModified, bytes.NewReader(buf)); err != nil { ctx.ServerError("ServeBlob", err) } return @@ -189,7 +189,7 @@ func GetRawFileOrLFS(ctx *context.APIContext) { return } - if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil { + if err := common.ServeLargeFile(ctx.Context, ctx.Repo.TreePath, lastModified, bytes.NewReader(buf)); err != nil { ctx.ServerError("ServeBlob", err) } return @@ -219,7 +219,7 @@ func GetRawFileOrLFS(ctx *context.APIContext) { } defer lfsDataRc.Close() - if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, meta.Size, lfsDataRc); err != nil { + if err := common.ServeLargeFile(ctx.Context, ctx.Repo.TreePath, lastModified, lfsDataRc); err != nil { ctx.ServerError("ServeData", err) } } diff --git a/routers/common/repo.go b/routers/common/repo.go index b3cd749115fb..5c656383c196 100644 --- a/routers/common/repo.go +++ b/routers/common/repo.go @@ -7,8 +7,9 @@ package common import ( "fmt" "io" - "path" + "net/http" "path/filepath" + "strconv" "strings" "time" @@ -22,7 +23,8 @@ import ( "code.gitea.io/gitea/modules/util" ) -// ServeBlob download a git.Blob +// ServeBlob serve git.Blob which represents a normal(non-lfs) file stored in repositories +// todo: implement io.Seeker for git.Blob.blobReader to support Range-Request func ServeBlob(ctx *context.Context, blob *git.Blob, lastModified time.Time) error { if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) { return nil @@ -38,13 +40,8 @@ func ServeBlob(ctx *context.Context, blob *git.Blob, lastModified time.Time) err } }() - return ServeData(ctx, ctx.Repo.TreePath, blob.Size(), dataRc) -} - -// ServeData download file from io.Reader -func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) error { buf := make([]byte, 1024) - n, err := util.ReadAtMost(reader, buf) + n, err := util.ReadAtMost(dataRc, buf) if err != nil { return err } @@ -52,35 +49,66 @@ func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) buf = buf[:n] } - ctx.Resp.Header().Set("Cache-Control", "public,max-age=86400") - + size := blob.Size() if size >= 0 { - ctx.Resp.Header().Set("Content-Length", fmt.Sprintf("%d", size)) + ctx.Resp.Header().Set("Content-Length", strconv.FormatInt(size, 10)) } else { - log.Error("ServeData called to serve data: %s with size < 0: %d", name, size) + log.Error("ServeData called to serve data: %s with size < 0: %d", ctx.Repo.TreePath, size) + } + + if err := setCommonHeaders(ctx, ctx.Repo.TreePath, buf); err != nil { + return err + } + + _, err = ctx.Resp.Write(buf) + if err != nil { + return err } - name = path.Base(name) + _, err = io.Copy(ctx.Resp, dataRc) + return err +} +func setCommonHeaders(ctx *context.Context, name string, data interface{}) error { // Google Chrome dislike commas in filenames, so let's change it to a space name = strings.ReplaceAll(name, ",", " ") - st := typesniffer.DetectContentType(buf) + ctx.Resp.Header().Set("Cache-Control", "public, max-age=300") + + // reset the offset to the start of served file + if seeker, ok := data.(io.ReadSeeker); ok { + _, _ = seeker.Seek(0, io.SeekStart) + } + + st, err := typesniffer.DetectContentTypeExtFirst(name, data) + if nil != err { + return err + } mappedMimeType := "" if setting.MimeTypeMap.Enabled { fileExtension := strings.ToLower(filepath.Ext(name)) mappedMimeType = setting.MimeTypeMap.Map[fileExtension] } + if st.IsText() || ctx.FormBool("render") { - cs, err := charset.DetectEncoding(buf) + var cs string + var err error + if reader, ok := data.(io.ReadSeeker); ok { + cs, err = charset.DetectEncodingFromReader(reader) + _, _ = reader.Seek(0, io.SeekStart) + } else { + cs, err = charset.DetectEncoding(data.([]byte)) + } if err != nil { log.Error("Detect raw file %s charset failed: %v, using by default utf-8", name, err) cs = "utf-8" } + if mappedMimeType == "" { mappedMimeType = "text/plain" } ctx.Resp.Header().Set("Content-Type", mappedMimeType+"; charset="+strings.ToLower(cs)) + } else { ctx.Resp.Header().Set("Access-Control-Expose-Headers", "Content-Disposition") if mappedMimeType != "" { @@ -102,10 +130,14 @@ func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) } } - _, err = ctx.Resp.Write(buf) - if err != nil { + return nil +} + +// ServeLargeFile Serve files stored with Git LFS and attachments uploaded on the Releases page +func ServeLargeFile(ctx *context.Context, name string, time time.Time, reader io.ReadSeeker) error { + if err := setCommonHeaders(ctx, name, reader); err != nil { return err } - _, err = io.Copy(ctx.Resp, reader) - return err + http.ServeContent(ctx.Resp, ctx.Req, name, time, reader) + return nil } diff --git a/routers/web/repo/attachment.go b/routers/web/repo/attachment.go index 190dc6c2c751..a276f1f7b7c2 100644 --- a/routers/web/repo/attachment.go +++ b/routers/web/repo/attachment.go @@ -145,7 +145,7 @@ func GetAttachment(ctx *context.Context) { } defer fr.Close() - if err = common.ServeData(ctx, attach.Name, attach.Size, fr); err != nil { + if err = common.ServeLargeFile(ctx, attach.Name, attach.CreatedUnix.AsTime(), fr); err != nil { ctx.ServerError("ServeData", err) return } diff --git a/routers/web/repo/download.go b/routers/web/repo/download.go index 6755cda87486..c92a2eddd14c 100644 --- a/routers/web/repo/download.go +++ b/routers/web/repo/download.go @@ -73,7 +73,7 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob, lastModified time.Time log.Error("ServeBlobOrLFS: Close: %v", err) } }() - return common.ServeData(ctx, ctx.Repo.TreePath, meta.Size, lfsDataRc) + return common.ServeLargeFile(ctx, ctx.Repo.TreePath, meta.CreatedUnix.AsTime(), lfsDataRc) } if err = dataRc.Close(); err != nil { log.Error("ServeBlobOrLFS: Close: %v", err)