[FEAT] Enable ambiguous character detection in configured contexts
- The ambiguous character detection is an important security feature to combat against sourcebase attacks (https://trojansource.codes/). - However there are a few problems with the feature as it stands today (i) it's apparantly an big performance hitter, it's twice as slow as syntax highlighting (ii) it contains false positives, because it's reporting valid problems but not valid within the context of a programming language (ambiguous charachters in code comments being a prime example) that can lead to security issues (iii) charachters from certain languages always being marked as ambiguous. It's a lot of effort to fix the aforementioned issues. - Therefore, make it configurable in which context the ambiguous character detection should be run, this avoids running detection in all contexts such as file views, but still enable it in commits and pull requests diffs where it matters the most. Ideally this also becomes an per-repository setting, but the code architecture doesn't allow for a clean implementation of that. - Adds unit test. - Adds integration tests to ensure that the contexts and instance-wide is respected (and that ambigious charachter detection actually work in different places). - Ref: https://codeberg.org/forgejo/forgejo/pulls/2395#issuecomment-1575547 - Ref: https://codeberg.org/forgejo/forgejo/issues/564
This commit is contained in:
parent
0081e59243
commit
5b3a82d621
9 changed files with 151 additions and 15 deletions
|
@ -10,6 +10,7 @@ package charset
|
||||||
import (
|
import (
|
||||||
"html/template"
|
"html/template"
|
||||||
"io"
|
"io"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"code.gitea.io/gitea/modules/log"
|
"code.gitea.io/gitea/modules/log"
|
||||||
|
@ -20,16 +21,29 @@ import (
|
||||||
// RuneNBSP is the codepoint for NBSP
|
// RuneNBSP is the codepoint for NBSP
|
||||||
const RuneNBSP = 0xa0
|
const RuneNBSP = 0xa0
|
||||||
|
|
||||||
|
type escapeContext string
|
||||||
|
|
||||||
|
// Keep this consistent with the documentation of [ui].SKIP_ESCAPE_CONTEXTS
|
||||||
|
// Defines the different contexts that could be used to escape in.
|
||||||
|
const (
|
||||||
|
// Wiki pages.
|
||||||
|
WikiContext escapeContext = "wiki"
|
||||||
|
// Rendered content (except markup), source code and blames.
|
||||||
|
FileviewContext escapeContext = "file-view"
|
||||||
|
// Commits or pull requet's diff.
|
||||||
|
DiffContext escapeContext = "diff"
|
||||||
|
)
|
||||||
|
|
||||||
// EscapeControlHTML escapes the unicode control sequences in a provided html document
|
// EscapeControlHTML escapes the unicode control sequences in a provided html document
|
||||||
func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
|
func EscapeControlHTML(html template.HTML, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
|
||||||
sb := &strings.Builder{}
|
sb := &strings.Builder{}
|
||||||
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader
|
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, context, allowed...) // err has been handled in EscapeControlReader
|
||||||
return escaped, template.HTML(sb.String())
|
return escaped, template.HTML(sb.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
|
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
|
||||||
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
|
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, err error) {
|
||||||
if !setting.UI.AmbiguousUnicodeDetection {
|
if !setting.UI.AmbiguousUnicodeDetection || slices.Contains(setting.UI.SkipEscapeContexts, string(context)) {
|
||||||
_, err = io.Copy(writer, reader)
|
_, err = io.Copy(writer, reader)
|
||||||
return &EscapeStatus{}, err
|
return &EscapeStatus{}, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
package charset
|
package charset
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"html/template"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
@ -14,6 +15,8 @@ import (
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var testContext = escapeContext("test")
|
||||||
|
|
||||||
type escapeControlTest struct {
|
type escapeControlTest struct {
|
||||||
name string
|
name string
|
||||||
text string
|
text string
|
||||||
|
@ -159,7 +162,7 @@ func TestEscapeControlReader(t *testing.T) {
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
output := &strings.Builder{}
|
output := &strings.Builder{}
|
||||||
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{})
|
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{}, testContext)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, tt.status, *status)
|
assert.Equal(t, tt.status, *status)
|
||||||
assert.Equal(t, tt.result, output.String())
|
assert.Equal(t, tt.result, output.String())
|
||||||
|
@ -169,9 +172,22 @@ func TestEscapeControlReader(t *testing.T) {
|
||||||
|
|
||||||
func TestSettingAmbiguousUnicodeDetection(t *testing.T) {
|
func TestSettingAmbiguousUnicodeDetection(t *testing.T) {
|
||||||
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)()
|
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)()
|
||||||
_, out := EscapeControlHTML("a test", &translation.MockLocale{})
|
|
||||||
|
_, out := EscapeControlHTML("a test", &translation.MockLocale{}, testContext)
|
||||||
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
|
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
|
||||||
setting.UI.AmbiguousUnicodeDetection = false
|
setting.UI.AmbiguousUnicodeDetection = false
|
||||||
_, out = EscapeControlHTML("a test", &translation.MockLocale{})
|
_, out = EscapeControlHTML("a test", &translation.MockLocale{}, testContext)
|
||||||
assert.EqualValues(t, `a test`, out)
|
assert.EqualValues(t, `a test`, out)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAmbiguousUnicodeDetectionContext(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"test"})()
|
||||||
|
|
||||||
|
input := template.HTML("a test")
|
||||||
|
|
||||||
|
_, out := EscapeControlHTML(input, &translation.MockLocale{}, escapeContext("not-test"))
|
||||||
|
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
|
||||||
|
|
||||||
|
_, out = EscapeControlHTML(input, &translation.MockLocale{}, testContext)
|
||||||
|
assert.EqualValues(t, input, out)
|
||||||
|
}
|
||||||
|
|
|
@ -38,6 +38,7 @@ var UI = struct {
|
||||||
PreferredTimestampTense string
|
PreferredTimestampTense string
|
||||||
|
|
||||||
AmbiguousUnicodeDetection bool
|
AmbiguousUnicodeDetection bool
|
||||||
|
SkipEscapeContexts []string
|
||||||
|
|
||||||
Notification struct {
|
Notification struct {
|
||||||
MinTimeout time.Duration
|
MinTimeout time.Duration
|
||||||
|
@ -89,6 +90,7 @@ var UI = struct {
|
||||||
PreferredTimestampTense: "mixed",
|
PreferredTimestampTense: "mixed",
|
||||||
|
|
||||||
AmbiguousUnicodeDetection: true,
|
AmbiguousUnicodeDetection: true,
|
||||||
|
SkipEscapeContexts: []string{},
|
||||||
|
|
||||||
Notification: struct {
|
Notification: struct {
|
||||||
MinTimeout time.Duration
|
MinTimeout time.Duration
|
||||||
|
|
|
@ -279,7 +279,7 @@ func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames
|
||||||
lexerName = lexerNameForLine
|
lexerName = lexerNameForLine
|
||||||
}
|
}
|
||||||
|
|
||||||
br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale)
|
br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale, charset.FileviewContext)
|
||||||
rows = append(rows, br)
|
rows = append(rows, br)
|
||||||
escapeStatus = escapeStatus.Or(br.EscapeStatus)
|
escapeStatus = escapeStatus.Or(br.EscapeStatus)
|
||||||
}
|
}
|
||||||
|
|
|
@ -307,7 +307,7 @@ func LFSFileGet(ctx *context.Context) {
|
||||||
|
|
||||||
// Building code view blocks with line number on server side.
|
// Building code view blocks with line number on server side.
|
||||||
escapedContent := &bytes.Buffer{}
|
escapedContent := &bytes.Buffer{}
|
||||||
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale)
|
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale, charset.FileviewContext)
|
||||||
|
|
||||||
var output bytes.Buffer
|
var output bytes.Buffer
|
||||||
lines := strings.Split(escapedContent.String(), "\n")
|
lines := strings.Split(escapedContent.String(), "\n")
|
||||||
|
|
|
@ -338,7 +338,7 @@ func renderReadmeFile(ctx *context.Context, subfolder string, readmeFile *git.Tr
|
||||||
log.Error("Read readme content failed: %v", err)
|
log.Error("Read readme content failed: %v", err)
|
||||||
}
|
}
|
||||||
contentEscaped := template.HTMLEscapeString(util.UnsafeBytesToString(content))
|
contentEscaped := template.HTMLEscapeString(util.UnsafeBytesToString(content))
|
||||||
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(template.HTML(contentEscaped), ctx.Locale)
|
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(template.HTML(contentEscaped), ctx.Locale, charset.FileviewContext)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !fInfo.isLFSFile && ctx.Repo.CanEnableEditor(ctx, ctx.Doer) {
|
if !fInfo.isLFSFile && ctx.Repo.CanEnableEditor(ctx, ctx.Doer) {
|
||||||
|
@ -572,7 +572,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry) {
|
||||||
status := &charset.EscapeStatus{}
|
status := &charset.EscapeStatus{}
|
||||||
statuses := make([]*charset.EscapeStatus, len(fileContent))
|
statuses := make([]*charset.EscapeStatus, len(fileContent))
|
||||||
for i, line := range fileContent {
|
for i, line := range fileContent {
|
||||||
statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale)
|
statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale, charset.FileviewContext)
|
||||||
status = status.Or(statuses[i])
|
status = status.Or(statuses[i])
|
||||||
}
|
}
|
||||||
ctx.Data["EscapeStatus"] = status
|
ctx.Data["EscapeStatus"] = status
|
||||||
|
@ -678,7 +678,7 @@ func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input i
|
||||||
go func() {
|
go func() {
|
||||||
sb := &strings.Builder{}
|
sb := &strings.Builder{}
|
||||||
// We allow NBSP here this is rendered
|
// We allow NBSP here this is rendered
|
||||||
escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP)
|
escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.FileviewContext, charset.RuneNBSP)
|
||||||
output = template.HTML(sb.String())
|
output = template.HTML(sb.String())
|
||||||
close(done)
|
close(done)
|
||||||
}()
|
}()
|
||||||
|
|
|
@ -254,7 +254,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
|
||||||
done := make(chan struct{})
|
done := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
// We allow NBSP here this is rendered
|
// We allow NBSP here this is rendered
|
||||||
escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.RuneNBSP)
|
escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.WikiContext, charset.RuneNBSP)
|
||||||
output = buf.String()
|
output = buf.String()
|
||||||
buf.Reset()
|
buf.Reset()
|
||||||
close(done)
|
close(done)
|
||||||
|
|
|
@ -284,14 +284,14 @@ type DiffInline struct {
|
||||||
|
|
||||||
// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
|
// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
|
||||||
func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
|
func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
|
||||||
status, content := charset.EscapeControlHTML(s, locale)
|
status, content := charset.EscapeControlHTML(s, locale, charset.DiffContext)
|
||||||
return DiffInline{EscapeStatus: status, Content: content}
|
return DiffInline{EscapeStatus: status, Content: content}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
|
// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
|
||||||
func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline {
|
func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline {
|
||||||
highlighted, _ := highlight.Code(fileName, language, code)
|
highlighted, _ := highlight.Code(fileName, language, code)
|
||||||
status, content := charset.EscapeControlHTML(highlighted, locale)
|
status, content := charset.EscapeControlHTML(highlighted, locale, charset.DiffContext)
|
||||||
return DiffInline{EscapeStatus: status, Content: content}
|
return DiffInline{EscapeStatus: status, Content: content}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,8 +5,16 @@ package integration
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
unit_model "code.gitea.io/gitea/models/unit"
|
||||||
|
"code.gitea.io/gitea/models/unittest"
|
||||||
|
user_model "code.gitea.io/gitea/models/user"
|
||||||
|
"code.gitea.io/gitea/modules/setting"
|
||||||
|
"code.gitea.io/gitea/modules/test"
|
||||||
|
files_service "code.gitea.io/gitea/services/repository/files"
|
||||||
"code.gitea.io/gitea/tests"
|
"code.gitea.io/gitea/tests"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
@ -25,3 +33,99 @@ func TestRenderFileSVGIsInImgTag(t *testing.T) {
|
||||||
assert.True(t, exists, "The SVG image should be in an <img> tag so that scripts in the SVG are not run")
|
assert.True(t, exists, "The SVG image should be in an <img> tag so that scripts in the SVG are not run")
|
||||||
assert.Equal(t, "/user2/repo2/raw/branch/master/line.svg", src)
|
assert.Equal(t, "/user2/repo2/raw/branch/master/line.svg", src)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAmbiguousCharacterDetection(t *testing.T) {
|
||||||
|
onGiteaRun(t, func(t *testing.T, u *url.URL) {
|
||||||
|
user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
|
||||||
|
session := loginUser(t, user2.Name)
|
||||||
|
|
||||||
|
// Prepare the environments. File view, commit view (diff), wiki page.
|
||||||
|
repo, commitID, f := CreateDeclarativeRepo(t, user2, "",
|
||||||
|
[]unit_model.Type{unit_model.TypeCode, unit_model.TypeWiki}, nil,
|
||||||
|
[]*files_service.ChangeRepoFile{
|
||||||
|
{
|
||||||
|
Operation: "create",
|
||||||
|
TreePath: "test.sh",
|
||||||
|
ContentReader: strings.NewReader("Hello there!\nline western"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
defer f()
|
||||||
|
|
||||||
|
req := NewRequestWithValues(t, "POST", repo.Link()+"/wiki?action=new", map[string]string{
|
||||||
|
"_csrf": GetCSRF(t, session, repo.Link()+"/wiki?action=new"),
|
||||||
|
"title": "Normal",
|
||||||
|
"content": "Hello – Hello",
|
||||||
|
})
|
||||||
|
session.MakeRequest(t, req, http.StatusSeeOther)
|
||||||
|
|
||||||
|
assertCase := func(t *testing.T, fileContext, commitContext, wikiContext bool) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
t.Run("File context", func(t *testing.T) {
|
||||||
|
defer tests.PrintCurrentTest(t)()
|
||||||
|
|
||||||
|
req := NewRequest(t, "GET", repo.Link()+"/src/branch/main/test.sh")
|
||||||
|
resp := session.MakeRequest(t, req, http.StatusOK)
|
||||||
|
|
||||||
|
htmlDoc := NewHTMLParser(t, resp.Body)
|
||||||
|
htmlDoc.AssertElement(t, ".unicode-escape-prompt", fileContext)
|
||||||
|
})
|
||||||
|
t.Run("Commit context", func(t *testing.T) {
|
||||||
|
defer tests.PrintCurrentTest(t)()
|
||||||
|
|
||||||
|
req := NewRequest(t, "GET", repo.Link()+"/commit/"+commitID)
|
||||||
|
resp := session.MakeRequest(t, req, http.StatusOK)
|
||||||
|
|
||||||
|
htmlDoc := NewHTMLParser(t, resp.Body)
|
||||||
|
htmlDoc.AssertElement(t, ".lines-escape .toggle-escape-button", commitContext)
|
||||||
|
})
|
||||||
|
t.Run("Wiki context", func(t *testing.T) {
|
||||||
|
defer tests.PrintCurrentTest(t)()
|
||||||
|
|
||||||
|
req := NewRequest(t, "GET", repo.Link()+"/wiki/Normal")
|
||||||
|
resp := session.MakeRequest(t, req, http.StatusOK)
|
||||||
|
|
||||||
|
htmlDoc := NewHTMLParser(t, resp.Body)
|
||||||
|
htmlDoc.AssertElement(t, ".unicode-escape-prompt", wikiContext)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("Enabled all context", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{})()
|
||||||
|
|
||||||
|
assertCase(t, true, true, true)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Enabled file context", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"diff", "wiki"})()
|
||||||
|
|
||||||
|
assertCase(t, true, false, false)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Enabled commit context", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "wiki"})()
|
||||||
|
|
||||||
|
assertCase(t, false, true, false)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Enabled wiki context", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "diff"})()
|
||||||
|
|
||||||
|
assertCase(t, false, false, true)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("No context", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "wiki", "diff"})()
|
||||||
|
|
||||||
|
assertCase(t, false, false, false)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Disabled detection", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{})()
|
||||||
|
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, false)()
|
||||||
|
|
||||||
|
assertCase(t, false, false, false)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue