// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package template import ( "bytes" "fmt" "html" "io" "text/template" "text/template/parse" ) // escapeTemplate rewrites the named template, which must be // associated with t, to guarantee that the output of any of the named // templates is properly escaped. If no error is returned, then the named templates have // been modified. Otherwise the named templates have been rendered // unusable. func escapeTemplate(tmpl *Template, node parse.Node, name string) error { c, _ := tmpl.esc.escapeTree(context{}, node, name, 0) var err error if c.err != nil { err, c.err.Name = c.err, name } else if c.state != stateText { err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} } if err != nil { // Prevent execution of unsafe templates. if t := tmpl.set[name]; t != nil { t.escapeErr = err t.text.Tree = nil t.Tree = nil } return err } tmpl.esc.commit() if t := tmpl.set[name]; t != nil { t.escapeErr = escapeOK t.Tree = t.text.Tree } return nil } // evalArgs formats the list of arguments into a string. It is equivalent to // fmt.Sprint(args...), except that it deferences all pointers. func evalArgs(args ...interface{}) string { // Optimization for simple common case of a single string argument. if len(args) == 1 { if s, ok := args[0].(string); ok { return s } } for i, arg := range args { args[i] = indirectToStringerOrError(arg) } return fmt.Sprint(args...) } // funcMap maps command names to functions that render their inputs safe. var funcMap = template.FuncMap{ "_html_template_attrescaper": attrEscaper, "_html_template_commentescaper": commentEscaper, "_html_template_cssescaper": cssEscaper, "_html_template_cssvaluefilter": cssValueFilter, "_html_template_htmlnamefilter": htmlNameFilter, "_html_template_htmlescaper": htmlEscaper, "_html_template_jsregexpescaper": jsRegexpEscaper, "_html_template_jsstrescaper": jsStrEscaper, "_html_template_jsvalescaper": jsValEscaper, "_html_template_nospaceescaper": htmlNospaceEscaper, "_html_template_rcdataescaper": rcdataEscaper, "_html_template_srcsetescaper": srcsetFilterAndEscaper, "_html_template_urlescaper": urlEscaper, "_html_template_urlfilter": urlFilter, "_html_template_urlnormalizer": urlNormalizer, "_eval_args_": evalArgs, } // escaper collects type inferences about templates and changes needed to make // templates injection safe. type escaper struct { // ns is the nameSpace that this escaper is associated with. ns *nameSpace // output[templateName] is the output context for a templateName that // has been mangled to include its input context. output map[string]context // derived[c.mangle(name)] maps to a template derived from the template // named name templateName for the start context c. derived map[string]*template.Template // called[templateName] is a set of called mangled template names. called map[string]bool // xxxNodeEdits are the accumulated edits to apply during commit. // Such edits are not applied immediately in case a template set // executes a given template in different escaping contexts. actionNodeEdits map[*parse.ActionNode][]string templateNodeEdits map[*parse.TemplateNode]string textNodeEdits map[*parse.TextNode][]byte } // makeEscaper creates a blank escaper for the given set. func makeEscaper(n *nameSpace) escaper { return escaper{ n, map[string]context{}, map[string]*template.Template{}, map[string]bool{}, map[*parse.ActionNode][]string{}, map[*parse.TemplateNode]string{}, map[*parse.TextNode][]byte{}, } } // filterFailsafe is an innocuous word that is emitted in place of unsafe values // by sanitizer functions. It is not a keyword in any programming language, // contains no special characters, is not empty, and when it appears in output // it is distinct enough that a developer can find the source of the problem // via a search engine. const filterFailsafe = "ZgotmplZ" // escape escapes a template node. func (e *escaper) escape(c context, n parse.Node) context { switch n := n.(type) { case *parse.ActionNode: return e.escapeAction(c, n) case *parse.CommentNode: return c case *parse.IfNode: return e.escapeBranch(c, &n.BranchNode, "if") case *parse.ListNode: return e.escapeList(c, n) case *parse.RangeNode: return e.escapeBranch(c, &n.BranchNode, "range") case *parse.TemplateNode: return e.escapeTemplate(c, n) case *parse.TextNode: return e.escapeText(c, n) case *parse.WithNode: return e.escapeBranch(c, &n.BranchNode, "with") } panic("escaping " + n.String() + " is unimplemented") } // escapeAction escapes an action template node. func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { if len(n.Pipe.Decl) != 0 { // A local variable assignment, not an interpolation. return c } c = nudge(c) // Check for disallowed use of predefined escapers in the pipeline. for pos, idNode := range n.Pipe.Cmds { node, ok := idNode.Args[0].(*parse.IdentifierNode) if !ok { // A predefined escaper "esc" will never be found as an identifier in a // Chain or Field node, since: // - "esc.x ..." is invalid, since predefined escapers return strings, and // strings do not have methods, keys or fields. // - "... .esc" is invalid, since predefined escapers are global functions, // not methods or fields of any types. // Therefore, it is safe to ignore these two node types. continue } ident := node.Ident if _, ok := predefinedEscapers[ident]; ok { if pos < len(n.Pipe.Cmds)-1 || c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" { return context{ state: stateError, err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident), } } } } s := make([]string, 0, 3) switch c.state { case stateError: return c case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: switch c.urlPart { case urlPartNone: s = append(s, "_html_template_urlfilter") fallthrough case urlPartPreQuery: switch c.state { case stateCSSDqStr, stateCSSSqStr: s = append(s, "_html_template_cssescaper") default: s = append(s, "_html_template_urlnormalizer") } case urlPartQueryOrFrag: s = append(s, "_html_template_urlescaper") case urlPartUnknown: return context{ state: stateError, err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n), } default: panic(c.urlPart.String()) } case stateJS: s = append(s, "_html_template_jsvalescaper") // A slash after a value starts a div operator. c.jsCtx = jsCtxDivOp case stateJSDqStr, stateJSSqStr: s = append(s, "_html_template_jsstrescaper") case stateJSRegexp: s = append(s, "_html_template_jsregexpescaper") case stateCSS: s = append(s, "_html_template_cssvaluefilter") case stateText: s = append(s, "_html_template_htmlescaper") case stateRCDATA: s = append(s, "_html_template_rcdataescaper") case stateAttr: // Handled below in delim check. case stateAttrName, stateTag: c.state = stateAttrName s = append(s, "_html_template_htmlnamefilter") case stateSrcset: s = append(s, "_html_template_srcsetescaper") default: if isComment(c.state) { s = append(s, "_html_template_commentescaper") } else { panic("unexpected state " + c.state.String()) } } switch c.delim { case delimNone: // No extra-escaping needed for raw text content. case delimSpaceOrTagEnd: s = append(s, "_html_template_nospaceescaper") default: s = append(s, "_html_template_attrescaper") } e.editActionNode(n, s) return c } // ensurePipelineContains ensures that the pipeline ends with the commands with // the identifiers in s in order. If the pipeline ends with a predefined escaper // (i.e. "html" or "urlquery"), merge it with the identifiers in s. func ensurePipelineContains(p *parse.PipeNode, s []string) { if len(s) == 0 { // Do not rewrite pipeline if we have no escapers to insert. return } // Precondition: p.Cmds contains at most one predefined escaper and the // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is // always true because of the checks in escapeAction. pipelineLen := len(p.Cmds) if pipelineLen > 0 { lastCmd := p.Cmds[pipelineLen-1] if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok { if esc := idNode.Ident; predefinedEscapers[esc] { // Pipeline ends with a predefined escaper. if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 { // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }}, // where esc is the predefined escaper, and arg1...argN are its arguments. // Convert this into the equivalent form // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily // merged with the escapers in s. lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position()) p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position())) pipelineLen++ } // If any of the commands in s that we are about to insert is equivalent // to the predefined escaper, use the predefined escaper instead. dup := false for i, escaper := range s { if escFnsEq(esc, escaper) { s[i] = idNode.Ident dup = true } } if dup { // The predefined escaper will already be inserted along with the // escapers in s, so do not copy it to the rewritten pipeline. pipelineLen-- } } } } // Rewrite the pipeline, creating the escapers in s at the end of the pipeline. newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s)) insertedIdents := make(map[string]bool) for i := 0; i < pipelineLen; i++ { cmd := p.Cmds[i] newCmds[i] = cmd if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok { insertedIdents[normalizeEscFn(idNode.Ident)] = true } } for _, name := range s { if !insertedIdents[normalizeEscFn(name)] { // When two templates share an underlying parse tree via the use of // AddParseTree and one template is executed after the other, this check // ensures that escapers that were already inserted into the pipeline on // the first escaping pass do not get inserted again. newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position())) } } p.Cmds = newCmds } // predefinedEscapers contains template predefined escapers that are equivalent // to some contextual escapers. Keep in sync with equivEscapers. var predefinedEscapers = map[string]bool{ "html": true, "urlquery": true, } // equivEscapers matches contextual escapers to equivalent predefined // template escapers. var equivEscapers = map[string]string{ // The following pairs of HTML escapers provide equivalent security // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'. "_html_template_attrescaper": "html", "_html_template_htmlescaper": "html", "_html_template_rcdataescaper": "html", // These two URL escapers produce URLs safe for embedding in a URL query by // percent-encoding all the reserved characters specified in RFC 3986 Section // 2.2 "_html_template_urlescaper": "urlquery", // These two functions are not actually equivalent; urlquery is stricter as it // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer // does not. It is therefore only safe to replace _html_template_urlnormalizer // with urlquery (this happens in ensurePipelineContains), but not the otherI've // way around. We keep this entry around to preserve the behavior of templates // written before Go 1.9, which might depend on this substitution taking place. "_html_template_urlnormalizer": "urlquery", } // escFnsEq reports whether the two escaping functions are equivalent. func escFnsEq(a, b string) bool { return normalizeEscFn(a) == normalizeEscFn(b) } // normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of // escaper functions a and b that are equivalent. func normalizeEscFn(e string) string { if norm := equivEscapers[e]; norm != "" { return norm } return e } // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x) // for all x. var redundantFuncs = map[string]map[string]bool{ "_html_template_commentescaper": { "_html_template_attrescaper": true, "_html_template_nospaceescaper": true, "_html_template_htmlescaper": true, }, "_html_template_cssescaper": { "_html_template_attrescaper": true, }, "_html_template_jsregexpescaper": { "_html_template_attrescaper": true, }, "_html_template_jsstrescaper": { "_html_template_attrescaper": true, }, "_html_template_urlescaper": { "_html_template_urlnormalizer": true, }, } // appendCmd appends the given command to the end of the command pipeline // unless it is redundant with the last command. func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode { if n := len(cmds); n != 0 { last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode) next, okNext := cmd.Args[0].(*parse.IdentifierNode) if okLast && okNext && redundantFuncs[last.Ident][next.Ident] { return cmds } } return append(cmds, cmd) } // newIdentCmd produces a command containing a single identifier node. func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { return &parse.CommandNode{ NodeType: parse.NodeCommand, Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. } } // nudge returns the context that would result from following empty string // transitions from the input context. // For example, parsing: // `90% of the time. e.output[t.Name()] = c return e.escapeListConditionally(c, t.Tree.Root, filter) } // delimEnds maps each delim to a string of characters that terminate it. var delimEnds = [...]string{ delimDoubleQuote: `"`, delimSingleQuote: "'", // Determined empirically by running the below in various browsers. // var div = document.createElement("DIV"); // for (var i = 0; i < 0x10000; ++i) { // div.innerHTML = ""; // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) // document.write("

U+" + i.toString(16)); // } delimSpaceOrTagEnd: " \t\n\f\r>", } var doctypeBytes = []byte("= i; j-- { if s[j] == '<' { end = j break } } } for j := i; j < end; j++ { if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) { b.Write(s[written:j]) b.WriteString("<") written = j + 1 } } } else if isComment(c.state) && c.delim == delimNone { switch c.state { case stateJSBlockCmt: // https://es5.github.com/#x7.4: // "Comments behave like white space and are // discarded except that, if a MultiLineComment // contains a line terminator character, then // the entire comment is considered to be a // LineTerminator for purposes of parsing by // the syntactic grammar." if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") { b.WriteByte('\n') } else { b.WriteByte(' ') } case stateCSSBlockCmt: b.WriteByte(' ') } written = i1 } if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { // Preserve the portion between written and the comment start. cs := i1 - 2 if c1.state == stateHTMLCmt { // "