Rework raw file http header logic (#20484) (#20542)

- Always respect the user's configured mime type map
- Allow more types like image/pdf/video/audio to serve with correct content-type
- Shorten cache duration of raw files to 5 minutes, matching GitHub
- Don't set `content-disposition: attachment`, let the browser decide whether it wants to download or display a file directly
- Implement rfc5987 for filenames, remove previous hack. Confirmed it working in Safari.
- Make PDF attachment work in Safari by removing `sandbox` attribute.

This change will make a lot more file types open directly in browser now. Logic should generally be more readable than before with less `if` nesting and such.

Replaces: https://github.com/go-gitea/gitea/pull/20460
Replaces: https://github.com/go-gitea/gitea/pull/20455
Fixes: https://github.com/go-gitea/gitea/issues/20404
This commit is contained in:
silverwind 2022-07-30 18:37:02 +02:00 committed by GitHub
parent 97a8c96c5b
commit eeb490c7ab
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 67 additions and 39 deletions

View file

@ -70,6 +70,16 @@ func (ct SniffedType) IsRepresentableAsText() bool {
return ct.IsText() || ct.IsSvgImage()
}
// IsBrowsableType returns whether a non-text type can be displayed in a browser
func (ct SniffedType) IsBrowsableBinaryType() bool {
return ct.IsImage() || ct.IsSvgImage() || ct.IsPDF() || ct.IsVideo() || ct.IsAudio()
}
// GetMimeType returns the mime type
func (ct SniffedType) GetMimeType() string {
return strings.SplitN(ct.contentType, ";", 2)[0]
}
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
func DetectContentType(data []byte) SniffedType {
if len(data) == 0 {

View file

@ -7,12 +7,13 @@ package common
import (
"fmt"
"io"
"net/url"
"path"
"path/filepath"
"strings"
"time"
"code.gitea.io/gitea/modules/charset"
charsetModule "code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/httpcache"
@ -42,7 +43,7 @@ func ServeBlob(ctx *context.Context, blob *git.Blob, lastModified time.Time) err
}
// ServeData download file from io.Reader
func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) error {
func ServeData(ctx *context.Context, filePath string, size int64, reader io.Reader) error {
buf := make([]byte, 1024)
n, err := util.ReadAtMost(reader, buf)
if err != nil {
@ -52,56 +53,73 @@ func ServeData(ctx *context.Context, name string, size int64, reader io.Reader)
buf = buf[:n]
}
ctx.Resp.Header().Set("Cache-Control", "public,max-age=86400")
httpcache.AddCacheControlToHeader(ctx.Resp.Header(), 5*time.Minute)
if size >= 0 {
ctx.Resp.Header().Set("Content-Length", fmt.Sprintf("%d", size))
} else {
log.Error("ServeData called to serve data: %s with size < 0: %d", name, size)
log.Error("ServeData called to serve data: %s with size < 0: %d", filePath, size)
}
name = path.Base(name)
// Google Chrome dislike commas in filenames, so let's change it to a space
name = strings.ReplaceAll(name, ",", " ")
fileName := path.Base(filePath)
sniffedType := typesniffer.DetectContentType(buf)
isPlain := sniffedType.IsText() || ctx.FormBool("render")
mimeType := ""
charset := ""
st := typesniffer.DetectContentType(buf)
mappedMimeType := ""
if setting.MimeTypeMap.Enabled {
fileExtension := strings.ToLower(filepath.Ext(name))
mappedMimeType = setting.MimeTypeMap.Map[fileExtension]
fileExtension := strings.ToLower(filepath.Ext(fileName))
mimeType = setting.MimeTypeMap.Map[fileExtension]
}
if st.IsText() || ctx.FormBool("render") {
cs, err := charset.DetectEncoding(buf)
if err != nil {
log.Error("Detect raw file %s charset failed: %v, using by default utf-8", name, err)
cs = "utf-8"
}
if mappedMimeType == "" {
mappedMimeType = "text/plain"
}
ctx.Resp.Header().Set("Content-Type", mappedMimeType+"; charset="+strings.ToLower(cs))
} else {
ctx.Resp.Header().Set("Access-Control-Expose-Headers", "Content-Disposition")
if mappedMimeType != "" {
ctx.Resp.Header().Set("Content-Type", mappedMimeType)
}
if (st.IsImage() || st.IsPDF()) && (setting.UI.SVG.Enabled || !st.IsSvgImage()) {
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf(`inline; filename="%s"`, name))
if st.IsSvgImage() || st.IsPDF() {
ctx.Resp.Header().Set("Content-Security-Policy", "default-src 'none'; style-src 'unsafe-inline'; sandbox")
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
if st.IsSvgImage() {
ctx.Resp.Header().Set("Content-Type", typesniffer.SvgMimeType)
} else {
ctx.Resp.Header().Set("Content-Type", typesniffer.ApplicationOctetStream)
}
}
if mimeType == "" {
if sniffedType.IsBrowsableBinaryType() {
mimeType = sniffedType.GetMimeType()
} else if isPlain {
mimeType = "text/plain"
} else {
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, name))
mimeType = typesniffer.ApplicationOctetStream
}
}
if isPlain {
charset, err = charsetModule.DetectEncoding(buf)
if err != nil {
log.Error("Detect raw file %s charset failed: %v, using by default utf-8", filePath, err)
charset = "utf-8"
}
}
if charset != "" {
ctx.Resp.Header().Set("Content-Type", mimeType+"; charset="+strings.ToLower(charset))
} else {
ctx.Resp.Header().Set("Content-Type", mimeType)
}
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
isSVG := sniffedType.IsSvgImage()
// serve types that can present a security risk with CSP
if isSVG {
ctx.Resp.Header().Set("Content-Security-Policy", "default-src 'none'; style-src 'unsafe-inline'; sandbox")
} else if sniffedType.IsPDF() {
// no sandbox attribute for pdf as it breaks rendering in at least safari. this
// should generally be safe as scripts inside PDF can not escape the PDF document
// see https://bugs.chromium.org/p/chromium/issues/detail?id=413851 for more discussion
ctx.Resp.Header().Set("Content-Security-Policy", "default-src 'none'; style-src 'unsafe-inline'")
}
disposition := "inline"
if isSVG && !setting.UI.SVG.Enabled {
disposition = "attachment"
}
// encode filename per https://datatracker.ietf.org/doc/html/rfc5987
encodedFileName := `filename*=UTF-8''` + url.PathEscape(fileName)
ctx.Resp.Header().Set("Content-Disposition", disposition+"; "+encodedFileName)
ctx.Resp.Header().Set("Access-Control-Expose-Headers", "Content-Disposition")
_, err = ctx.Resp.Write(buf)
if err != nil {
return err