Detect ogg mime-type as audio or video (#26494)

"ogg" is just a "container" format for audio and video.

Golang's `DetectContentType` only reports "application/ogg" for
potential ogg files.

Actually it could do more "guess" to see whether it is a audio file or a
video file.
This commit is contained in:
wxiaoguang 2023-08-15 10:31:25 +08:00 committed by GitHub
parent c91a7e8dbb
commit ced34bab1a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 1 deletions

View file

@ -71,7 +71,7 @@ func (ct SniffedType) IsRepresentableAsText() bool {
return ct.IsText() || ct.IsSvgImage() return ct.IsText() || ct.IsSvgImage()
} }
// IsBrowsableType returns whether a non-text type can be displayed in a browser // IsBrowsableBinaryType returns whether a non-text type can be displayed in a browser
func (ct SniffedType) IsBrowsableBinaryType() bool { func (ct SniffedType) IsBrowsableBinaryType() bool {
return ct.IsImage() || ct.IsSvgImage() || ct.IsPDF() || ct.IsVideo() || ct.IsAudio() return ct.IsImage() || ct.IsSvgImage() || ct.IsPDF() || ct.IsVideo() || ct.IsAudio()
} }
@ -116,6 +116,17 @@ func DetectContentType(data []byte) SniffedType {
} }
} }
if ct == "application/ogg" {
dataHead := data
if len(dataHead) > 256 {
dataHead = dataHead[:256] // only need to do a quick check for the file header
}
if bytes.Contains(dataHead, []byte("theora")) || bytes.Contains(dataHead, []byte("dirac")) {
ct = "video/ogg" // ogg is only used for some video formats, and it's not popular
} else {
ct = "audio/ogg" // for most cases, it is used as an audio container
}
}
return SniffedType{ct} return SniffedType{ct}
} }

View file

@ -6,6 +6,7 @@ package typesniffer
import ( import (
"bytes" "bytes"
"encoding/base64" "encoding/base64"
"encoding/hex"
"strings" "strings"
"testing" "testing"
@ -121,3 +122,15 @@ func TestDetectContentTypeFromReader(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.True(t, st.IsAudio()) assert.True(t, st.IsAudio())
} }
func TestDetectContentTypeOgg(t *testing.T) {
oggAudio, _ := hex.DecodeString("4f67675300020000000000000000352f0000000000007dc39163011e01766f72626973000000000244ac0000000000000071020000000000b8014f6767530000")
st, err := DetectContentTypeFromReader(bytes.NewReader(oggAudio))
assert.NoError(t, err)
assert.True(t, st.IsAudio())
oggVideo, _ := hex.DecodeString("4f676753000200000000000000007d9747ef000000009b59daf3012a807468656f7261030201001e00110001e000010e00020000001e00000001000001000001")
st, err = DetectContentTypeFromReader(bytes.NewReader(oggVideo))
assert.NoError(t, err)
assert.True(t, st.IsVideo())
}