Fix mime-type detection for HTTP server (#18370)

Bypass the unstable behavior of Golang's mime.TypeByExtension
This commit is contained in:
wxiaoguang 2022-01-23 20:19:49 +08:00 committed by GitHub
parent 35fdefc1ff
commit 87141b908d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 61 additions and 20 deletions

View file

@ -0,0 +1,41 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package public
import "strings"
// wellKnownMimeTypesLower comes from Golang's builtin mime package: `builtinTypesLower`, see the comment of detectWellKnownMimeType
var wellKnownMimeTypesLower = map[string]string{
".avif": "image/avif",
".css": "text/css; charset=utf-8",
".gif": "image/gif",
".htm": "text/html; charset=utf-8",
".html": "text/html; charset=utf-8",
".jpeg": "image/jpeg",
".jpg": "image/jpeg",
".js": "text/javascript; charset=utf-8",
".json": "application/json",
".mjs": "text/javascript; charset=utf-8",
".pdf": "application/pdf",
".png": "image/png",
".svg": "image/svg+xml",
".wasm": "application/wasm",
".webp": "image/webp",
".xml": "text/xml; charset=utf-8",
// well, there are some types missing from the builtin list
".txt": "text/plain; charset=utf-8",
}
// detectWellKnownMimeType will return the mime-type for a well-known file ext name
// The purpose of this function is to bypass the unstable behavior of Golang's mime.TypeByExtension
// mime.TypeByExtension would use OS's mime-type config to overwrite the well-known types (see its document).
// If the user's OS has incorrect mime-type config, it would make Gitea can not respond a correct Content-Type to browsers.
// For example, if Gitea returns `text/plain` for a `.js` file, the browser couldn't run the JS due to security reasons.
// detectWellKnownMimeType makes the Content-Type for well-known files stable.
func detectWellKnownMimeType(ext string) string {
ext = strings.ToLower(ext)
return wellKnownMimeTypesLower[ext]
}

View file

@ -92,6 +92,15 @@ func parseAcceptEncoding(val string) map[string]bool {
return types return types
} }
// setWellKnownContentType will set the Content-Type if the file is a well-known type.
// See the comments of detectWellKnownMimeType
func setWellKnownContentType(w http.ResponseWriter, file string) {
mimeType := detectWellKnownMimeType(filepath.Ext(file))
if mimeType != "" {
w.Header().Set("Content-Type", mimeType)
}
}
func (opts *Options) handle(w http.ResponseWriter, req *http.Request, fs http.FileSystem, file string) bool { func (opts *Options) handle(w http.ResponseWriter, req *http.Request, fs http.FileSystem, file string) bool {
// use clean to keep the file is a valid path with no . or .. // use clean to keep the file is a valid path with no . or ..
f, err := fs.Open(path.Clean(file)) f, err := fs.Open(path.Clean(file))
@ -122,6 +131,8 @@ func (opts *Options) handle(w http.ResponseWriter, req *http.Request, fs http.Fi
return true return true
} }
setWellKnownContentType(w, file)
serveContent(w, req, fi, fi.ModTime(), f) serveContent(w, req, fi, fi.ModTime(), f)
return true return true
} }

View file

@ -9,15 +9,12 @@ package public
import ( import (
"bytes" "bytes"
"compress/gzip"
"io" "io"
"mime"
"net/http" "net/http"
"os" "os"
"path/filepath" "path/filepath"
"time" "time"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/timeutil"
) )
@ -66,24 +63,16 @@ func serveContent(w http.ResponseWriter, req *http.Request, fi os.FileInfo, modt
encodings := parseAcceptEncoding(req.Header.Get("Accept-Encoding")) encodings := parseAcceptEncoding(req.Header.Get("Accept-Encoding"))
if encodings["gzip"] { if encodings["gzip"] {
if cf, ok := fi.(*vfsgen۰CompressedFileInfo); ok { if cf, ok := fi.(*vfsgen۰CompressedFileInfo); ok {
rd := bytes.NewReader(cf.GzipBytes()) rdGzip := bytes.NewReader(cf.GzipBytes())
w.Header().Set("Content-Encoding", "gzip") // all static files are managed by Gitea, so we can make sure every file has the correct ext name
ctype := mime.TypeByExtension(filepath.Ext(fi.Name())) // then we can get the correct Content-Type, we do not need to do http.DetectContentType on the decompressed data
if ctype == "" { mimeType := detectWellKnownMimeType(filepath.Ext(fi.Name()))
// read a chunk to decide between utf-8 text and binary if mimeType == "" {
var buf [512]byte mimeType = "application/octet-stream"
grd, _ := gzip.NewReader(rd)
n, _ := io.ReadFull(grd, buf[:])
ctype = http.DetectContentType(buf[:n])
_, err := rd.Seek(0, io.SeekStart) // rewind to output whole file
if err != nil {
log.Error("rd.Seek error: %v", err)
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
}
} }
w.Header().Set("Content-Type", ctype) w.Header().Set("Content-Type", mimeType)
http.ServeContent(w, req, fi.Name(), modtime, rd) w.Header().Set("Content-Encoding", "gzip")
http.ServeContent(w, req, fi.Name(), modtime, rdGzip)
return return
} }
} }