Best K6 code snippet using html.Scope
parse.go
Source:parse.go
...53 return p.doc54}55// Stop tags for use in popUntil. These come from section 12.2.4.2.56var (57 defaultScopeStopTags = map[string][]a.Atom{58 "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},59 "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},60 "svg": {a.Desc, a.ForeignObject, a.Title},61 }62)63type scope int64const (65 defaultScope scope = iota66 listItemScope67 buttonScope68 tableScope69 tableRowScope70 tableBodyScope71 selectScope72)73// popUntil pops the stack of open elements at the highest element whose tag74// is in matchTags, provided there is no higher element in the scope's stop75// tags (as defined in section 12.2.4.2). It returns whether or not there was76// such an element. If there was not, popUntil leaves the stack unchanged.77//78// For example, the set of stop tags for table scope is: "html", "table". If79// the stack was:80// ["html", "body", "font", "table", "b", "i", "u"]81// then popUntil(tableScope, "font") would return false, but82// popUntil(tableScope, "i") would return true and the stack would become:83// ["html", "body", "font", "table", "b"]84//85// If an element's tag is in both the stop tags and matchTags, then the stack86// will be popped and the function returns true (provided, of course, there was87// no higher element in the stack that was also in the stop tags). For example,88// popUntil(tableScope, "table") returns true and leaves:89// ["html", "body", "font"]90func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {91 if i := p.indexOfElementInScope(s, matchTags...); i != -1 {92 p.oe = p.oe[:i]93 return true94 }95 return false96}97// indexOfElementInScope returns the index in p.oe of the highest element whose98// tag is in matchTags that is in scope. If no matching element is in scope, it99// returns -1.100func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {101 for i := len(p.oe) - 1; i >= 0; i-- {102 tagAtom := p.oe[i].DataAtom103 if p.oe[i].Namespace == "" {104 for _, t := range matchTags {105 if t == tagAtom {106 return i107 }108 }109 switch s {110 case defaultScope:111 // No-op.112 case listItemScope:113 if tagAtom == a.Ol || tagAtom == a.Ul {114 return -1115 }116 case buttonScope:117 if tagAtom == a.Button {118 return -1119 }120 case tableScope:121 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {122 return -1123 }124 case selectScope:125 if tagAtom != a.Optgroup && tagAtom != a.Option {126 return -1127 }128 default:129 panic("unreachable")130 }131 }132 switch s {133 case defaultScope, listItemScope, buttonScope:134 for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {135 if t == tagAtom {136 return -1137 }138 }139 }140 }141 return -1142}143// elementInScope is like popUntil, except that it doesn't modify the stack of144// open elements.145func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {146 return p.indexOfElementInScope(s, matchTags...) != -1147}148// clearStackToContext pops elements off the stack of open elements until a149// scope-defined element is found.150func (p *parser) clearStackToContext(s scope) {151 for i := len(p.oe) - 1; i >= 0; i-- {152 tagAtom := p.oe[i].DataAtom153 switch s {154 case tableScope:155 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {156 p.oe = p.oe[:i+1]157 return158 }159 case tableRowScope:160 if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {161 p.oe = p.oe[:i+1]162 return163 }164 case tableBodyScope:165 if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {166 p.oe = p.oe[:i+1]167 return168 }169 default:170 panic("unreachable")171 }172 }173}174// parseGenericRawTextElements implements the generic raw text element parsing175// algorithm defined in 12.2.6.2.176// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text177// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part178// officially, need to make tokenizer consider both states.179func (p *parser) parseGenericRawTextElement() {180 p.addElement()181 p.originalIM = p.im182 p.im = textIM183}184// generateImpliedEndTags pops nodes off the stack of open elements as long as185// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.186// If exceptions are specified, nodes with that name will not be popped off.187func (p *parser) generateImpliedEndTags(exceptions ...string) {188 var i int189loop:190 for i = len(p.oe) - 1; i >= 0; i-- {191 n := p.oe[i]192 if n.Type != ElementNode {193 break194 }195 switch n.DataAtom {196 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:197 for _, except := range exceptions {198 if n.Data == except {199 break loop200 }201 }202 continue203 }204 break205 }206 p.oe = p.oe[:i+1]207}208// addChild adds a child node n to the top element, and pushes n onto the stack209// of open elements if it is an element node.210func (p *parser) addChild(n *Node) {211 if p.shouldFosterParent() {212 p.fosterParent(n)213 } else {214 p.top().AppendChild(n)215 }216 if n.Type == ElementNode {217 p.oe = append(p.oe, n)218 }219}220// shouldFosterParent returns whether the next node to be added should be221// foster parented.222func (p *parser) shouldFosterParent() bool {223 if p.fosterParenting {224 switch p.top().DataAtom {225 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:226 return true227 }228 }229 return false230}231// fosterParent adds a child node according to the foster parenting rules.232// Section 12.2.6.1, "foster parenting".233func (p *parser) fosterParent(n *Node) {234 var table, parent, prev, template *Node235 var i int236 for i = len(p.oe) - 1; i >= 0; i-- {237 if p.oe[i].DataAtom == a.Table {238 table = p.oe[i]239 break240 }241 }242 var j int243 for j = len(p.oe) - 1; j >= 0; j-- {244 if p.oe[j].DataAtom == a.Template {245 template = p.oe[j]246 break247 }248 }249 if template != nil && (table == nil || j > i) {250 template.AppendChild(n)251 return252 }253 if table == nil {254 // The foster parent is the html element.255 parent = p.oe[0]256 } else {257 parent = table.Parent258 }259 if parent == nil {260 parent = p.oe[i-1]261 }262 if table != nil {263 prev = table.PrevSibling264 } else {265 prev = parent.LastChild266 }267 if prev != nil && prev.Type == TextNode && n.Type == TextNode {268 prev.Data += n.Data269 return270 }271 parent.InsertBefore(n, table)272}273// addText adds text to the preceding node if it is a text node, or else it274// calls addChild with a new text node.275func (p *parser) addText(text string) {276 if text == "" {277 return278 }279 if p.shouldFosterParent() {280 p.fosterParent(&Node{281 Type: TextNode,282 Data: text,283 })284 return285 }286 t := p.top()287 if n := t.LastChild; n != nil && n.Type == TextNode {288 n.Data += text289 return290 }291 p.addChild(&Node{292 Type: TextNode,293 Data: text,294 })295}296// addElement adds a child element based on the current token.297func (p *parser) addElement() {298 p.addChild(&Node{299 Type: ElementNode,300 DataAtom: p.tok.DataAtom,301 Data: p.tok.Data,302 Attr: p.tok.Attr,303 })304}305// Section 12.2.4.3.306func (p *parser) addFormattingElement() {307 tagAtom, attr := p.tok.DataAtom, p.tok.Attr308 p.addElement()309 // Implement the Noah's Ark clause, but with three per family instead of two.310 identicalElements := 0311findIdenticalElements:312 for i := len(p.afe) - 1; i >= 0; i-- {313 n := p.afe[i]314 if n.Type == scopeMarkerNode {315 break316 }317 if n.Type != ElementNode {318 continue319 }320 if n.Namespace != "" {321 continue322 }323 if n.DataAtom != tagAtom {324 continue325 }326 if len(n.Attr) != len(attr) {327 continue328 }329 compareAttributes:330 for _, t0 := range n.Attr {331 for _, t1 := range attr {332 if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {333 // Found a match for this attribute, continue with the next attribute.334 continue compareAttributes335 }336 }337 // If we get here, there is no attribute that matches a.338 // Therefore the element is not identical to the new one.339 continue findIdenticalElements340 }341 identicalElements++342 if identicalElements >= 3 {343 p.afe.remove(n)344 }345 }346 p.afe = append(p.afe, p.top())347}348// Section 12.2.4.3.349func (p *parser) clearActiveFormattingElements() {350 for {351 if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {352 return353 }354 }355}356// Section 12.2.4.3.357func (p *parser) reconstructActiveFormattingElements() {358 n := p.afe.top()359 if n == nil {360 return361 }362 if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {363 return364 }365 i := len(p.afe) - 1366 for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {367 if i == 0 {368 i = -1369 break370 }371 i--372 n = p.afe[i]373 }374 for {375 i++376 clone := p.afe[i].clone()377 p.addChild(clone)378 p.afe[i] = clone379 if i == len(p.afe)-1 {380 break381 }382 }383}384// Section 12.2.5.385func (p *parser) acknowledgeSelfClosingTag() {386 p.hasSelfClosingToken = false387}388// An insertion mode (section 12.2.4.1) is the state transition function from389// a particular state in the HTML5 parser's state machine. It updates the390// parser's fields depending on parser.tok (where ErrorToken means EOF).391// It returns whether the token was consumed.392type insertionMode func(*parser) bool393// setOriginalIM sets the insertion mode to return to after completing a text or394// inTableText insertion mode.395// Section 12.2.4.1, "using the rules for".396func (p *parser) setOriginalIM() {397 if p.originalIM != nil {398 panic("html: bad parser state: originalIM was set twice")399 }400 p.originalIM = p.im401}402// Section 12.2.4.1, "reset the insertion mode".403func (p *parser) resetInsertionMode() {404 for i := len(p.oe) - 1; i >= 0; i-- {405 n := p.oe[i]406 last := i == 0407 if last && p.context != nil {408 n = p.context409 }410 switch n.DataAtom {411 case a.Select:412 if !last {413 for ancestor, first := n, p.oe[0]; ancestor != first; {414 ancestor = p.oe[p.oe.index(ancestor)-1]415 switch ancestor.DataAtom {416 case a.Template:417 p.im = inSelectIM418 return419 case a.Table:420 p.im = inSelectInTableIM421 return422 }423 }424 }425 p.im = inSelectIM426 case a.Td, a.Th:427 // TODO: remove this divergence from the HTML5 spec.428 //429 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668430 p.im = inCellIM431 case a.Tr:432 p.im = inRowIM433 case a.Tbody, a.Thead, a.Tfoot:434 p.im = inTableBodyIM435 case a.Caption:436 p.im = inCaptionIM437 case a.Colgroup:438 p.im = inColumnGroupIM439 case a.Table:440 p.im = inTableIM441 case a.Template:442 // TODO: remove this divergence from the HTML5 spec.443 if n.Namespace != "" {444 continue445 }446 p.im = p.templateStack.top()447 case a.Head:448 // TODO: remove this divergence from the HTML5 spec.449 //450 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668451 p.im = inHeadIM452 case a.Body:453 p.im = inBodyIM454 case a.Frameset:455 p.im = inFramesetIM456 case a.Html:457 if p.head == nil {458 p.im = beforeHeadIM459 } else {460 p.im = afterHeadIM461 }462 default:463 if last {464 p.im = inBodyIM465 return466 }467 continue468 }469 return470 }471}472const whitespace = " \t\r\n\f"473// Section 12.2.6.4.1.474func initialIM(p *parser) bool {475 switch p.tok.Type {476 case TextToken:477 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)478 if len(p.tok.Data) == 0 {479 // It was all whitespace, so ignore it.480 return true481 }482 case CommentToken:483 p.doc.AppendChild(&Node{484 Type: CommentNode,485 Data: p.tok.Data,486 })487 return true488 case DoctypeToken:489 n, quirks := parseDoctype(p.tok.Data)490 p.doc.AppendChild(n)491 p.quirks = quirks492 p.im = beforeHTMLIM493 return true494 }495 p.quirks = true496 p.im = beforeHTMLIM497 return false498}499// Section 12.2.6.4.2.500func beforeHTMLIM(p *parser) bool {501 switch p.tok.Type {502 case DoctypeToken:503 // Ignore the token.504 return true505 case TextToken:506 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)507 if len(p.tok.Data) == 0 {508 // It was all whitespace, so ignore it.509 return true510 }511 case StartTagToken:512 if p.tok.DataAtom == a.Html {513 p.addElement()514 p.im = beforeHeadIM515 return true516 }517 case EndTagToken:518 switch p.tok.DataAtom {519 case a.Head, a.Body, a.Html, a.Br:520 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())521 return false522 default:523 // Ignore the token.524 return true525 }526 case CommentToken:527 p.doc.AppendChild(&Node{528 Type: CommentNode,529 Data: p.tok.Data,530 })531 return true532 }533 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())534 return false535}536// Section 12.2.6.4.3.537func beforeHeadIM(p *parser) bool {538 switch p.tok.Type {539 case TextToken:540 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)541 if len(p.tok.Data) == 0 {542 // It was all whitespace, so ignore it.543 return true544 }545 case StartTagToken:546 switch p.tok.DataAtom {547 case a.Head:548 p.addElement()549 p.head = p.top()550 p.im = inHeadIM551 return true552 case a.Html:553 return inBodyIM(p)554 }555 case EndTagToken:556 switch p.tok.DataAtom {557 case a.Head, a.Body, a.Html, a.Br:558 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())559 return false560 default:561 // Ignore the token.562 return true563 }564 case CommentToken:565 p.addChild(&Node{566 Type: CommentNode,567 Data: p.tok.Data,568 })569 return true570 case DoctypeToken:571 // Ignore the token.572 return true573 }574 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())575 return false576}577// Section 12.2.6.4.4.578func inHeadIM(p *parser) bool {579 switch p.tok.Type {580 case TextToken:581 s := strings.TrimLeft(p.tok.Data, whitespace)582 if len(s) < len(p.tok.Data) {583 // Add the initial whitespace to the current node.584 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])585 if s == "" {586 return true587 }588 p.tok.Data = s589 }590 case StartTagToken:591 switch p.tok.DataAtom {592 case a.Html:593 return inBodyIM(p)594 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:595 p.addElement()596 p.oe.pop()597 p.acknowledgeSelfClosingTag()598 return true599 case a.Noscript:600 if p.scripting {601 p.parseGenericRawTextElement()602 return true603 }604 p.addElement()605 p.im = inHeadNoscriptIM606 // Don't let the tokenizer go into raw text mode when scripting is disabled.607 p.tokenizer.NextIsNotRawText()608 return true609 case a.Script, a.Title:610 p.addElement()611 p.setOriginalIM()612 p.im = textIM613 return true614 case a.Noframes, a.Style:615 p.parseGenericRawTextElement()616 return true617 case a.Head:618 // Ignore the token.619 return true620 case a.Template:621 p.addElement()622 p.afe = append(p.afe, &scopeMarker)623 p.framesetOK = false624 p.im = inTemplateIM625 p.templateStack = append(p.templateStack, inTemplateIM)626 return true627 }628 case EndTagToken:629 switch p.tok.DataAtom {630 case a.Head:631 p.oe.pop()632 p.im = afterHeadIM633 return true634 case a.Body, a.Html, a.Br:635 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())636 return false637 case a.Template:638 if !p.oe.contains(a.Template) {639 return true640 }641 // TODO: remove this divergence from the HTML5 spec.642 //643 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668644 p.generateImpliedEndTags()645 for i := len(p.oe) - 1; i >= 0; i-- {646 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {647 p.oe = p.oe[:i]648 break649 }650 }651 p.clearActiveFormattingElements()652 p.templateStack.pop()653 p.resetInsertionMode()654 return true655 default:656 // Ignore the token.657 return true658 }659 case CommentToken:660 p.addChild(&Node{661 Type: CommentNode,662 Data: p.tok.Data,663 })664 return true665 case DoctypeToken:666 // Ignore the token.667 return true668 }669 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())670 return false671}672// 12.2.6.4.5.673func inHeadNoscriptIM(p *parser) bool {674 switch p.tok.Type {675 case DoctypeToken:676 // Ignore the token.677 return true678 case StartTagToken:679 switch p.tok.DataAtom {680 case a.Html:681 return inBodyIM(p)682 case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:683 return inHeadIM(p)684 case a.Head, a.Noscript:685 // Ignore the token.686 return true687 }688 case EndTagToken:689 switch p.tok.DataAtom {690 case a.Noscript, a.Br:691 default:692 // Ignore the token.693 return true694 }695 case TextToken:696 s := strings.TrimLeft(p.tok.Data, whitespace)697 if len(s) == 0 {698 // It was all whitespace.699 return inHeadIM(p)700 }701 case CommentToken:702 return inHeadIM(p)703 }704 p.oe.pop()705 if p.top().DataAtom != a.Head {706 panic("html: the new current node will be a head element.")707 }708 p.im = inHeadIM709 if p.tok.DataAtom == a.Noscript {710 return true711 }712 return false713}714// Section 12.2.6.4.6.715func afterHeadIM(p *parser) bool {716 switch p.tok.Type {717 case TextToken:718 s := strings.TrimLeft(p.tok.Data, whitespace)719 if len(s) < len(p.tok.Data) {720 // Add the initial whitespace to the current node.721 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])722 if s == "" {723 return true724 }725 p.tok.Data = s726 }727 case StartTagToken:728 switch p.tok.DataAtom {729 case a.Html:730 return inBodyIM(p)731 case a.Body:732 p.addElement()733 p.framesetOK = false734 p.im = inBodyIM735 return true736 case a.Frameset:737 p.addElement()738 p.im = inFramesetIM739 return true740 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:741 p.oe = append(p.oe, p.head)742 defer p.oe.remove(p.head)743 return inHeadIM(p)744 case a.Head:745 // Ignore the token.746 return true747 }748 case EndTagToken:749 switch p.tok.DataAtom {750 case a.Body, a.Html, a.Br:751 // Drop down to creating an implied <body> tag.752 case a.Template:753 return inHeadIM(p)754 default:755 // Ignore the token.756 return true757 }758 case CommentToken:759 p.addChild(&Node{760 Type: CommentNode,761 Data: p.tok.Data,762 })763 return true764 case DoctypeToken:765 // Ignore the token.766 return true767 }768 p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())769 p.framesetOK = true770 return false771}772// copyAttributes copies attributes of src not found on dst to dst.773func copyAttributes(dst *Node, src Token) {774 if len(src.Attr) == 0 {775 return776 }777 attr := map[string]string{}778 for _, t := range dst.Attr {779 attr[t.Key] = t.Val780 }781 for _, t := range src.Attr {782 if _, ok := attr[t.Key]; !ok {783 dst.Attr = append(dst.Attr, t)784 attr[t.Key] = t.Val785 }786 }787}788// Section 12.2.6.4.7.789func inBodyIM(p *parser) bool {790 switch p.tok.Type {791 case TextToken:792 d := p.tok.Data793 switch n := p.oe.top(); n.DataAtom {794 case a.Pre, a.Listing:795 if n.FirstChild == nil {796 // Ignore a newline at the start of a <pre> block.797 if d != "" && d[0] == '\r' {798 d = d[1:]799 }800 if d != "" && d[0] == '\n' {801 d = d[1:]802 }803 }804 }805 d = strings.Replace(d, "\x00", "", -1)806 if d == "" {807 return true808 }809 p.reconstructActiveFormattingElements()810 p.addText(d)811 if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {812 // There were non-whitespace characters inserted.813 p.framesetOK = false814 }815 case StartTagToken:816 switch p.tok.DataAtom {817 case a.Html:818 if p.oe.contains(a.Template) {819 return true820 }821 copyAttributes(p.oe[0], p.tok)822 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:823 return inHeadIM(p)824 case a.Body:825 if p.oe.contains(a.Template) {826 return true827 }828 if len(p.oe) >= 2 {829 body := p.oe[1]830 if body.Type == ElementNode && body.DataAtom == a.Body {831 p.framesetOK = false832 copyAttributes(body, p.tok)833 }834 }835 case a.Frameset:836 if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {837 // Ignore the token.838 return true839 }840 body := p.oe[1]841 if body.Parent != nil {842 body.Parent.RemoveChild(body)843 }844 p.oe = p.oe[:1]845 p.addElement()846 p.im = inFramesetIM847 return true848 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:849 p.popUntil(buttonScope, a.P)850 p.addElement()851 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:852 p.popUntil(buttonScope, a.P)853 switch n := p.top(); n.DataAtom {854 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:855 p.oe.pop()856 }857 p.addElement()858 case a.Pre, a.Listing:859 p.popUntil(buttonScope, a.P)860 p.addElement()861 // The newline, if any, will be dealt with by the TextToken case.862 p.framesetOK = false863 case a.Form:864 if p.form != nil && !p.oe.contains(a.Template) {865 // Ignore the token866 return true867 }868 p.popUntil(buttonScope, a.P)869 p.addElement()870 if !p.oe.contains(a.Template) {871 p.form = p.top()872 }873 case a.Li:874 p.framesetOK = false875 for i := len(p.oe) - 1; i >= 0; i-- {876 node := p.oe[i]877 switch node.DataAtom {878 case a.Li:879 p.oe = p.oe[:i]880 case a.Address, a.Div, a.P:881 continue882 default:883 if !isSpecialElement(node) {884 continue885 }886 }887 break888 }889 p.popUntil(buttonScope, a.P)890 p.addElement()891 case a.Dd, a.Dt:892 p.framesetOK = false893 for i := len(p.oe) - 1; i >= 0; i-- {894 node := p.oe[i]895 switch node.DataAtom {896 case a.Dd, a.Dt:897 p.oe = p.oe[:i]898 case a.Address, a.Div, a.P:899 continue900 default:901 if !isSpecialElement(node) {902 continue903 }904 }905 break906 }907 p.popUntil(buttonScope, a.P)908 p.addElement()909 case a.Plaintext:910 p.popUntil(buttonScope, a.P)911 p.addElement()912 case a.Button:913 p.popUntil(defaultScope, a.Button)914 p.reconstructActiveFormattingElements()915 p.addElement()916 p.framesetOK = false917 case a.A:918 for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {919 if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {920 p.inBodyEndTagFormatting(a.A, "a")921 p.oe.remove(n)922 p.afe.remove(n)923 break924 }925 }926 p.reconstructActiveFormattingElements()927 p.addFormattingElement()928 case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:929 p.reconstructActiveFormattingElements()930 p.addFormattingElement()931 case a.Nobr:932 p.reconstructActiveFormattingElements()933 if p.elementInScope(defaultScope, a.Nobr) {934 p.inBodyEndTagFormatting(a.Nobr, "nobr")935 p.reconstructActiveFormattingElements()936 }937 p.addFormattingElement()938 case a.Applet, a.Marquee, a.Object:939 p.reconstructActiveFormattingElements()940 p.addElement()941 p.afe = append(p.afe, &scopeMarker)942 p.framesetOK = false943 case a.Table:944 if !p.quirks {945 p.popUntil(buttonScope, a.P)946 }947 p.addElement()948 p.framesetOK = false949 p.im = inTableIM950 return true951 case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:952 p.reconstructActiveFormattingElements()953 p.addElement()954 p.oe.pop()955 p.acknowledgeSelfClosingTag()956 if p.tok.DataAtom == a.Input {957 for _, t := range p.tok.Attr {958 if t.Key == "type" {959 if strings.ToLower(t.Val) == "hidden" {960 // Skip setting framesetOK = false961 return true962 }963 }964 }965 }966 p.framesetOK = false967 case a.Param, a.Source, a.Track:968 p.addElement()969 p.oe.pop()970 p.acknowledgeSelfClosingTag()971 case a.Hr:972 p.popUntil(buttonScope, a.P)973 p.addElement()974 p.oe.pop()975 p.acknowledgeSelfClosingTag()976 p.framesetOK = false977 case a.Image:978 p.tok.DataAtom = a.Img979 p.tok.Data = a.Img.String()980 return false981 case a.Textarea:982 p.addElement()983 p.setOriginalIM()984 p.framesetOK = false985 p.im = textIM986 case a.Xmp:987 p.popUntil(buttonScope, a.P)988 p.reconstructActiveFormattingElements()989 p.framesetOK = false990 p.parseGenericRawTextElement()991 case a.Iframe:992 p.framesetOK = false993 p.parseGenericRawTextElement()994 case a.Noembed:995 p.parseGenericRawTextElement()996 case a.Noscript:997 if p.scripting {998 p.parseGenericRawTextElement()999 return true1000 }1001 p.reconstructActiveFormattingElements()1002 p.addElement()1003 // Don't let the tokenizer go into raw text mode when scripting is disabled.1004 p.tokenizer.NextIsNotRawText()1005 case a.Select:1006 p.reconstructActiveFormattingElements()1007 p.addElement()1008 p.framesetOK = false1009 p.im = inSelectIM1010 return true1011 case a.Optgroup, a.Option:1012 if p.top().DataAtom == a.Option {1013 p.oe.pop()1014 }1015 p.reconstructActiveFormattingElements()1016 p.addElement()1017 case a.Rb, a.Rtc:1018 if p.elementInScope(defaultScope, a.Ruby) {1019 p.generateImpliedEndTags()1020 }1021 p.addElement()1022 case a.Rp, a.Rt:1023 if p.elementInScope(defaultScope, a.Ruby) {1024 p.generateImpliedEndTags("rtc")1025 }1026 p.addElement()1027 case a.Math, a.Svg:1028 p.reconstructActiveFormattingElements()1029 if p.tok.DataAtom == a.Math {1030 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)1031 } else {1032 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)1033 }1034 adjustForeignAttributes(p.tok.Attr)1035 p.addElement()1036 p.top().Namespace = p.tok.Data1037 if p.hasSelfClosingToken {1038 p.oe.pop()1039 p.acknowledgeSelfClosingTag()1040 }1041 return true1042 case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:1043 // Ignore the token.1044 default:1045 p.reconstructActiveFormattingElements()1046 p.addElement()1047 }1048 case EndTagToken:1049 switch p.tok.DataAtom {1050 case a.Body:1051 if p.elementInScope(defaultScope, a.Body) {1052 p.im = afterBodyIM1053 }1054 case a.Html:1055 if p.elementInScope(defaultScope, a.Body) {1056 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())1057 return false1058 }1059 return true1060 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:1061 p.popUntil(defaultScope, p.tok.DataAtom)1062 case a.Form:1063 if p.oe.contains(a.Template) {1064 i := p.indexOfElementInScope(defaultScope, a.Form)1065 if i == -1 {1066 // Ignore the token.1067 return true1068 }1069 p.generateImpliedEndTags()1070 if p.oe[i].DataAtom != a.Form {1071 // Ignore the token.1072 return true1073 }1074 p.popUntil(defaultScope, a.Form)1075 } else {1076 node := p.form1077 p.form = nil1078 i := p.indexOfElementInScope(defaultScope, a.Form)1079 if node == nil || i == -1 || p.oe[i] != node {1080 // Ignore the token.1081 return true1082 }1083 p.generateImpliedEndTags()1084 p.oe.remove(node)1085 }1086 case a.P:1087 if !p.elementInScope(buttonScope, a.P) {1088 p.parseImpliedToken(StartTagToken, a.P, a.P.String())1089 }1090 p.popUntil(buttonScope, a.P)1091 case a.Li:1092 p.popUntil(listItemScope, a.Li)1093 case a.Dd, a.Dt:1094 p.popUntil(defaultScope, p.tok.DataAtom)1095 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:1096 p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)1097 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:1098 p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)1099 case a.Applet, a.Marquee, a.Object:1100 if p.popUntil(defaultScope, p.tok.DataAtom) {1101 p.clearActiveFormattingElements()1102 }1103 case a.Br:1104 p.tok.Type = StartTagToken1105 return false1106 case a.Template:1107 return inHeadIM(p)1108 default:1109 p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)1110 }1111 case CommentToken:1112 p.addChild(&Node{1113 Type: CommentNode,1114 Data: p.tok.Data,1115 })1116 case ErrorToken:1117 // TODO: remove this divergence from the HTML5 spec.1118 if len(p.templateStack) > 0 {1119 p.im = inTemplateIM1120 return false1121 }1122 for _, e := range p.oe {1123 switch e.DataAtom {1124 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,1125 a.Thead, a.Tr, a.Body, a.Html:1126 default:1127 return true1128 }1129 }1130 }1131 return true1132}1133func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {1134 // This is the "adoption agency" algorithm, described at1135 // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency1136 // TODO: this is a fairly literal line-by-line translation of that algorithm.1137 // Once the code successfully parses the comprehensive test suite, we should1138 // refactor this code to be more idiomatic.1139 // Steps 1-21140 if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {1141 p.oe.pop()1142 return1143 }1144 // Steps 3-5. The outer loop.1145 for i := 0; i < 8; i++ {1146 // Step 6. Find the formatting element.1147 var formattingElement *Node1148 for j := len(p.afe) - 1; j >= 0; j-- {1149 if p.afe[j].Type == scopeMarkerNode {1150 break1151 }1152 if p.afe[j].DataAtom == tagAtom {1153 formattingElement = p.afe[j]1154 break1155 }1156 }1157 if formattingElement == nil {1158 p.inBodyEndTagOther(tagAtom, tagName)1159 return1160 }1161 // Step 7. Ignore the tag if formatting element is not in the stack of open elements.1162 feIndex := p.oe.index(formattingElement)1163 if feIndex == -1 {1164 p.afe.remove(formattingElement)1165 return1166 }1167 // Step 8. Ignore the tag if formatting element is not in the scope.1168 if !p.elementInScope(defaultScope, tagAtom) {1169 // Ignore the tag.1170 return1171 }1172 // Step 9. This step is omitted because it's just a parse error but no need to return.1173 // Steps 10-11. Find the furthest block.1174 var furthestBlock *Node1175 for _, e := range p.oe[feIndex:] {1176 if isSpecialElement(e) {1177 furthestBlock = e1178 break1179 }1180 }1181 if furthestBlock == nil {1182 e := p.oe.pop()1183 for e != formattingElement {1184 e = p.oe.pop()1185 }1186 p.afe.remove(e)1187 return1188 }1189 // Steps 12-13. Find the common ancestor and bookmark node.1190 commonAncestor := p.oe[feIndex-1]1191 bookmark := p.afe.index(formattingElement)1192 // Step 14. The inner loop. Find the lastNode to reparent.1193 lastNode := furthestBlock1194 node := furthestBlock1195 x := p.oe.index(node)1196 // Step 14.1.1197 j := 01198 for {1199 // Step 14.2.1200 j++1201 // Step. 14.3.1202 x--1203 node = p.oe[x]1204 // Step 14.4. Go to the next step if node is formatting element.1205 if node == formattingElement {1206 break1207 }1208 // Step 14.5. Remove node from the list of active formatting elements if1209 // inner loop counter is greater than three and node is in the list of1210 // active formatting elements.1211 if ni := p.afe.index(node); j > 3 && ni > -1 {1212 p.afe.remove(node)1213 // If any element of the list of active formatting elements is removed,1214 // we need to take care whether bookmark should be decremented or not.1215 // This is because the value of bookmark may exceed the size of the1216 // list by removing elements from the list.1217 if ni <= bookmark {1218 bookmark--1219 }1220 continue1221 }1222 // Step 14.6. Continue the next inner loop if node is not in the list of1223 // active formatting elements.1224 if p.afe.index(node) == -1 {1225 p.oe.remove(node)1226 continue1227 }1228 // Step 14.7.1229 clone := node.clone()1230 p.afe[p.afe.index(node)] = clone1231 p.oe[p.oe.index(node)] = clone1232 node = clone1233 // Step 14.8.1234 if lastNode == furthestBlock {1235 bookmark = p.afe.index(node) + 11236 }1237 // Step 14.9.1238 if lastNode.Parent != nil {1239 lastNode.Parent.RemoveChild(lastNode)1240 }1241 node.AppendChild(lastNode)1242 // Step 14.10.1243 lastNode = node1244 }1245 // Step 15. Reparent lastNode to the common ancestor,1246 // or for misnested table nodes, to the foster parent.1247 if lastNode.Parent != nil {1248 lastNode.Parent.RemoveChild(lastNode)1249 }1250 switch commonAncestor.DataAtom {1251 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:1252 p.fosterParent(lastNode)1253 default:1254 commonAncestor.AppendChild(lastNode)1255 }1256 // Steps 16-18. Reparent nodes from the furthest block's children1257 // to a clone of the formatting element.1258 clone := formattingElement.clone()1259 reparentChildren(clone, furthestBlock)1260 furthestBlock.AppendChild(clone)1261 // Step 19. Fix up the list of active formatting elements.1262 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {1263 // Move the bookmark with the rest of the list.1264 bookmark--1265 }1266 p.afe.remove(formattingElement)1267 p.afe.insert(bookmark, clone)1268 // Step 20. Fix up the stack of open elements.1269 p.oe.remove(formattingElement)1270 p.oe.insert(p.oe.index(furthestBlock)+1, clone)1271 }1272}1273// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.1274// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content1275// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign1276func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {1277 for i := len(p.oe) - 1; i >= 0; i-- {1278 // Two element nodes have the same tag if they have the same Data (a1279 // string-typed field). As an optimization, for common HTML tags, each1280 // Data string is assigned a unique, non-zero DataAtom (a uint32-typed1281 // field), since integer comparison is faster than string comparison.1282 // Uncommon (custom) tags get a zero DataAtom.1283 //1284 // The if condition here is equivalent to (p.oe[i].Data == tagName).1285 if (p.oe[i].DataAtom == tagAtom) &&1286 ((tagAtom != 0) || (p.oe[i].Data == tagName)) {1287 p.oe = p.oe[:i]1288 break1289 }1290 if isSpecialElement(p.oe[i]) {1291 break1292 }1293 }1294}1295// Section 12.2.6.4.8.1296func textIM(p *parser) bool {1297 switch p.tok.Type {1298 case ErrorToken:1299 p.oe.pop()1300 case TextToken:1301 d := p.tok.Data1302 if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {1303 // Ignore a newline at the start of a <textarea> block.1304 if d != "" && d[0] == '\r' {1305 d = d[1:]1306 }1307 if d != "" && d[0] == '\n' {1308 d = d[1:]1309 }1310 }1311 if d == "" {1312 return true1313 }1314 p.addText(d)1315 return true1316 case EndTagToken:1317 p.oe.pop()1318 }1319 p.im = p.originalIM1320 p.originalIM = nil1321 return p.tok.Type == EndTagToken1322}1323// Section 12.2.6.4.9.1324func inTableIM(p *parser) bool {1325 switch p.tok.Type {1326 case TextToken:1327 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)1328 switch p.oe.top().DataAtom {1329 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:1330 if strings.Trim(p.tok.Data, whitespace) == "" {1331 p.addText(p.tok.Data)1332 return true1333 }1334 }1335 case StartTagToken:1336 switch p.tok.DataAtom {1337 case a.Caption:1338 p.clearStackToContext(tableScope)1339 p.afe = append(p.afe, &scopeMarker)1340 p.addElement()1341 p.im = inCaptionIM1342 return true1343 case a.Colgroup:1344 p.clearStackToContext(tableScope)1345 p.addElement()1346 p.im = inColumnGroupIM1347 return true1348 case a.Col:1349 p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())1350 return false1351 case a.Tbody, a.Tfoot, a.Thead:1352 p.clearStackToContext(tableScope)1353 p.addElement()1354 p.im = inTableBodyIM1355 return true1356 case a.Td, a.Th, a.Tr:1357 p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())1358 return false1359 case a.Table:1360 if p.popUntil(tableScope, a.Table) {1361 p.resetInsertionMode()1362 return false1363 }1364 // Ignore the token.1365 return true1366 case a.Style, a.Script, a.Template:1367 return inHeadIM(p)1368 case a.Input:1369 for _, t := range p.tok.Attr {1370 if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {1371 p.addElement()1372 p.oe.pop()1373 return true1374 }1375 }1376 // Otherwise drop down to the default action.1377 case a.Form:1378 if p.oe.contains(a.Template) || p.form != nil {1379 // Ignore the token.1380 return true1381 }1382 p.addElement()1383 p.form = p.oe.pop()1384 case a.Select:1385 p.reconstructActiveFormattingElements()1386 switch p.top().DataAtom {1387 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:1388 p.fosterParenting = true1389 }1390 p.addElement()1391 p.fosterParenting = false1392 p.framesetOK = false1393 p.im = inSelectInTableIM1394 return true1395 }1396 case EndTagToken:1397 switch p.tok.DataAtom {1398 case a.Table:1399 if p.popUntil(tableScope, a.Table) {1400 p.resetInsertionMode()1401 return true1402 }1403 // Ignore the token.1404 return true1405 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:1406 // Ignore the token.1407 return true1408 case a.Template:1409 return inHeadIM(p)1410 }1411 case CommentToken:1412 p.addChild(&Node{1413 Type: CommentNode,1414 Data: p.tok.Data,1415 })1416 return true1417 case DoctypeToken:1418 // Ignore the token.1419 return true1420 case ErrorToken:1421 return inBodyIM(p)1422 }1423 p.fosterParenting = true1424 defer func() { p.fosterParenting = false }()1425 return inBodyIM(p)1426}1427// Section 12.2.6.4.11.1428func inCaptionIM(p *parser) bool {1429 switch p.tok.Type {1430 case StartTagToken:1431 switch p.tok.DataAtom {1432 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:1433 if !p.popUntil(tableScope, a.Caption) {1434 // Ignore the token.1435 return true1436 }1437 p.clearActiveFormattingElements()1438 p.im = inTableIM1439 return false1440 case a.Select:1441 p.reconstructActiveFormattingElements()1442 p.addElement()1443 p.framesetOK = false1444 p.im = inSelectInTableIM1445 return true1446 }1447 case EndTagToken:1448 switch p.tok.DataAtom {1449 case a.Caption:1450 if p.popUntil(tableScope, a.Caption) {1451 p.clearActiveFormattingElements()1452 p.im = inTableIM1453 }1454 return true1455 case a.Table:1456 if !p.popUntil(tableScope, a.Caption) {1457 // Ignore the token.1458 return true1459 }1460 p.clearActiveFormattingElements()1461 p.im = inTableIM1462 return false1463 case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:1464 // Ignore the token.1465 return true1466 }1467 }1468 return inBodyIM(p)1469}1470// Section 12.2.6.4.12.1471func inColumnGroupIM(p *parser) bool {1472 switch p.tok.Type {1473 case TextToken:1474 s := strings.TrimLeft(p.tok.Data, whitespace)1475 if len(s) < len(p.tok.Data) {1476 // Add the initial whitespace to the current node.1477 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])1478 if s == "" {1479 return true1480 }1481 p.tok.Data = s1482 }1483 case CommentToken:1484 p.addChild(&Node{1485 Type: CommentNode,1486 Data: p.tok.Data,1487 })1488 return true1489 case DoctypeToken:1490 // Ignore the token.1491 return true1492 case StartTagToken:1493 switch p.tok.DataAtom {1494 case a.Html:1495 return inBodyIM(p)1496 case a.Col:1497 p.addElement()1498 p.oe.pop()1499 p.acknowledgeSelfClosingTag()1500 return true1501 case a.Template:1502 return inHeadIM(p)1503 }1504 case EndTagToken:1505 switch p.tok.DataAtom {1506 case a.Colgroup:1507 if p.oe.top().DataAtom == a.Colgroup {1508 p.oe.pop()1509 p.im = inTableIM1510 }1511 return true1512 case a.Col:1513 // Ignore the token.1514 return true1515 case a.Template:1516 return inHeadIM(p)1517 }1518 case ErrorToken:1519 return inBodyIM(p)1520 }1521 if p.oe.top().DataAtom != a.Colgroup {1522 return true1523 }1524 p.oe.pop()1525 p.im = inTableIM1526 return false1527}1528// Section 12.2.6.4.13.1529func inTableBodyIM(p *parser) bool {1530 switch p.tok.Type {1531 case StartTagToken:1532 switch p.tok.DataAtom {1533 case a.Tr:1534 p.clearStackToContext(tableBodyScope)1535 p.addElement()1536 p.im = inRowIM1537 return true1538 case a.Td, a.Th:1539 p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())1540 return false1541 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:1542 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {1543 p.im = inTableIM1544 return false1545 }1546 // Ignore the token.1547 return true1548 }1549 case EndTagToken:1550 switch p.tok.DataAtom {1551 case a.Tbody, a.Tfoot, a.Thead:1552 if p.elementInScope(tableScope, p.tok.DataAtom) {1553 p.clearStackToContext(tableBodyScope)1554 p.oe.pop()1555 p.im = inTableIM1556 }1557 return true1558 case a.Table:1559 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {1560 p.im = inTableIM1561 return false1562 }1563 // Ignore the token.1564 return true1565 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:1566 // Ignore the token.1567 return true1568 }1569 case CommentToken:1570 p.addChild(&Node{1571 Type: CommentNode,1572 Data: p.tok.Data,1573 })1574 return true1575 }1576 return inTableIM(p)1577}1578// Section 12.2.6.4.14.1579func inRowIM(p *parser) bool {1580 switch p.tok.Type {1581 case StartTagToken:1582 switch p.tok.DataAtom {1583 case a.Td, a.Th:1584 p.clearStackToContext(tableRowScope)1585 p.addElement()1586 p.afe = append(p.afe, &scopeMarker)1587 p.im = inCellIM1588 return true1589 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:1590 if p.popUntil(tableScope, a.Tr) {1591 p.im = inTableBodyIM1592 return false1593 }1594 // Ignore the token.1595 return true1596 }1597 case EndTagToken:1598 switch p.tok.DataAtom {1599 case a.Tr:1600 if p.popUntil(tableScope, a.Tr) {1601 p.im = inTableBodyIM1602 return true1603 }1604 // Ignore the token.1605 return true1606 case a.Table:1607 if p.popUntil(tableScope, a.Tr) {1608 p.im = inTableBodyIM1609 return false1610 }1611 // Ignore the token.1612 return true1613 case a.Tbody, a.Tfoot, a.Thead:1614 if p.elementInScope(tableScope, p.tok.DataAtom) {1615 p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())1616 return false1617 }1618 // Ignore the token.1619 return true1620 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:1621 // Ignore the token.1622 return true1623 }1624 }1625 return inTableIM(p)1626}1627// Section 12.2.6.4.15.1628func inCellIM(p *parser) bool {1629 switch p.tok.Type {1630 case StartTagToken:1631 switch p.tok.DataAtom {1632 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:1633 if p.popUntil(tableScope, a.Td, a.Th) {1634 // Close the cell and reprocess.1635 p.clearActiveFormattingElements()1636 p.im = inRowIM1637 return false1638 }1639 // Ignore the token.1640 return true1641 case a.Select:1642 p.reconstructActiveFormattingElements()1643 p.addElement()1644 p.framesetOK = false1645 p.im = inSelectInTableIM1646 return true1647 }1648 case EndTagToken:1649 switch p.tok.DataAtom {1650 case a.Td, a.Th:1651 if !p.popUntil(tableScope, p.tok.DataAtom) {1652 // Ignore the token.1653 return true1654 }1655 p.clearActiveFormattingElements()1656 p.im = inRowIM1657 return true1658 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:1659 // Ignore the token.1660 return true1661 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:1662 if !p.elementInScope(tableScope, p.tok.DataAtom) {1663 // Ignore the token.1664 return true1665 }1666 // Close the cell and reprocess.1667 if p.popUntil(tableScope, a.Td, a.Th) {1668 p.clearActiveFormattingElements()1669 }1670 p.im = inRowIM1671 return false1672 }1673 }1674 return inBodyIM(p)1675}1676// Section 12.2.6.4.16.1677func inSelectIM(p *parser) bool {1678 switch p.tok.Type {1679 case TextToken:1680 p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))1681 case StartTagToken:1682 switch p.tok.DataAtom {1683 case a.Html:1684 return inBodyIM(p)1685 case a.Option:1686 if p.top().DataAtom == a.Option {1687 p.oe.pop()1688 }1689 p.addElement()1690 case a.Optgroup:1691 if p.top().DataAtom == a.Option {1692 p.oe.pop()1693 }1694 if p.top().DataAtom == a.Optgroup {1695 p.oe.pop()1696 }1697 p.addElement()1698 case a.Select:1699 if !p.popUntil(selectScope, a.Select) {1700 // Ignore the token.1701 return true1702 }1703 p.resetInsertionMode()1704 case a.Input, a.Keygen, a.Textarea:1705 if p.elementInScope(selectScope, a.Select) {1706 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())1707 return false1708 }1709 // In order to properly ignore <textarea>, we need to change the tokenizer mode.1710 p.tokenizer.NextIsNotRawText()1711 // Ignore the token.1712 return true1713 case a.Script, a.Template:1714 return inHeadIM(p)1715 }1716 case EndTagToken:1717 switch p.tok.DataAtom {1718 case a.Option:1719 if p.top().DataAtom == a.Option {1720 p.oe.pop()1721 }1722 case a.Optgroup:1723 i := len(p.oe) - 11724 if p.oe[i].DataAtom == a.Option {1725 i--1726 }1727 if p.oe[i].DataAtom == a.Optgroup {1728 p.oe = p.oe[:i]1729 }1730 case a.Select:1731 if !p.popUntil(selectScope, a.Select) {1732 // Ignore the token.1733 return true1734 }1735 p.resetInsertionMode()1736 case a.Template:1737 return inHeadIM(p)1738 }1739 case CommentToken:1740 p.addChild(&Node{1741 Type: CommentNode,1742 Data: p.tok.Data,1743 })1744 case DoctypeToken:1745 // Ignore the token.1746 return true1747 case ErrorToken:1748 return inBodyIM(p)1749 }1750 return true1751}1752// Section 12.2.6.4.17.1753func inSelectInTableIM(p *parser) bool {1754 switch p.tok.Type {1755 case StartTagToken, EndTagToken:1756 switch p.tok.DataAtom {1757 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:1758 if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {1759 // Ignore the token.1760 return true1761 }1762 // This is like p.popUntil(selectScope, a.Select), but it also1763 // matches <math select>, not just <select>. Matching the MathML1764 // tag is arguably incorrect (conceptually), but it mimics what1765 // Chromium does.1766 for i := len(p.oe) - 1; i >= 0; i-- {1767 if n := p.oe[i]; n.DataAtom == a.Select {1768 p.oe = p.oe[:i]1769 break1770 }1771 }1772 p.resetInsertionMode()1773 return false1774 }1775 }1776 return inSelectIM(p)...
html_proc.go
Source:html_proc.go
...9 "golang.org/x/net/html/atom"10)11type htmlProcessor struct {12 scopeStack []string13 requireScopeStack bool14 requireDepthStack bool15 depth int16 depthStack []int17 sashimiBuffer strings.Builder18 transformer *transformer19 skipNextText bool20}21func (h *htmlProcessor) transform(reader io.Reader, writer io.Writer, skipSushi bool) error {22 h.sashimiBuffer.Reset()23 h.transformer = &transformer{24 attrMod: make(map[string]string),25 }26 tokenizer := html.NewTokenizer(reader)27 for {28 token := tokenizer.Next()29 switch token {30 case html.ErrorToken:31 err := tokenizer.Err()32 if err == io.EOF {33 return nil34 }35 return err36 case html.CommentToken:37 content := tokenizer.Text()38 if bytes.Contains(content, []byte("sashimi:")) {39 h.sashimiBuffer.Write(content)40 //this is rather cheap but for now enough, it could be determined in the transformer as well41 if bytes.Contains(content, []byte("sashimi:repeat")) || bytes.Contains(content, []byte("sashimi:layout(")) {42 if !h.requireDepthStack {43 h.depthStack = make([]int, 0)44 }45 h.requireDepthStack = true46 h.depthStack = append(h.depthStack, h.depth)47 }48 }49 break50 case html.TextToken:51 if h.skipNextText {52 h.skipNextText = false53 } else {54 content := tokenizer.Raw()55 writer.Write(content)56 }57 break58 case html.SelfClosingTagToken:59 content := tokenizer.Raw()60 writer.Write(content)61 break62 case html.StartTagToken:63 h.depth++64 if h.sashimiBuffer.Len() > 0 {65 content := tokenizer.Token()66 is := antlr.NewInputStream(h.sashimiBuffer.String())67 lexer := NewSashimiLexer(is)68 h.sashimiBuffer.Reset()69 stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)70 p := NewSashimiParser(stream)71 antlr.ParseTreeWalkerDefault.Walk(h.transformer, p.Block())72 attrMods := h.transformer.FlushAttributeModifiers()73 for k, v := range attrMods {74 found := false75 for i, attr := range content.Attr {76 if attr.Key == k {77 content.Attr[i] = html.Attribute{78 Key: k,79 Val: v,80 }81 found = true82 break83 }84 }85 if !found {86 content.Attr = append(content.Attr, html.Attribute{87 Key: k,88 Val: v,89 })90 }91 }92 pre, post := h.transformer.FlushBuffer()93 if len(pre) > 0 {94 writer.Write([]byte(pre))95 }96 writer.Write([]byte(content.String()))97 if len(post) > 0 {98 writer.Write([]byte(post))99 h.skipNextText = true100 }101 } else {102 content := tokenizer.Raw()103 writer.Write(content)104 }105 break106 case html.EndTagToken:107 h.depth--108 content := tokenizer.Token()109 if !skipSushi && content.DataAtom == atom.Html {110 writer.Write([]byte("<!--ð£-->"))111 }112 writer.Write([]byte(content.String()))113 if h.requireDepthStack {114 n := len(h.depthStack) - 1115 if n >= 0 {116 for n >= 0 && h.depthStack[n] == h.depth {117 h.depthStack = h.depthStack[:n]118 writer.Write([]byte("{{end}}"))119 n = len(h.depthStack) - 1120 }121 } else {122 h.requireDepthStack = false123 }124 }125 break126 case html.DoctypeToken:127 content := tokenizer.Raw()128 writer.Write(content)129 break130 }131 }132}133//extractFromHTML extracts all sashimi directives from the HTML (basically creates a .sushi files from .html)134func (h *htmlProcessor) extractFromHTML(reader io.Reader, writer io.Writer) error {135 tokenizer := html.NewTokenizer(reader)136 for {137 token := tokenizer.Next()138 if token == html.ErrorToken {139 err := tokenizer.Err()140 if err == io.EOF {141 return nil142 }143 return err144 }145 if token == html.StartTagToken {146 if h.requireScopeStack {147 offset := len(tokenizer.Raw())148 name, _ := tokenizer.TagName()149 scope := fmt.Sprintf("%s:%v", name, offset)150 if len(h.scopeStack) > 0 {151 n := len(h.scopeStack) - 1152 innerScope := fmt.Sprintf("%s::%s", h.scopeStack[n], scope)153 h.scopeStack = append(h.scopeStack, innerScope)154 implicitDirective := fmt.Sprintf(" sashimi:begin('%s') ", innerScope)155 _, err := writer.Write([]byte(implicitDirective))156 if err != nil {157 return err158 }159 } else {160 h.scopeStack = append(h.scopeStack, scope)161 implicitDirective := fmt.Sprintf(" sashimi:begin('%s') ", scope)162 _, err := writer.Write([]byte(implicitDirective))163 if err != nil {164 return err165 }166 }167 }168 }169 if token == html.EndTagToken {170 if h.requireScopeStack {171 n := len(h.scopeStack) - 1172 implicitDirective := fmt.Sprintf(" sashimi:end('%s') ", h.scopeStack[n])173 _, err := writer.Write([]byte(implicitDirective))174 if err != nil {175 return err176 }177 h.scopeStack = h.scopeStack[:n]178 if len(h.scopeStack) == 0 {179 h.requireScopeStack = false180 }181 }182 }183 if token == html.CommentToken {184 content := tokenizer.Text()185 if bytes.Contains(content, []byte("sashimi:")) {186 if bytes.Contains(content, []byte("sashimi:repeat")) {187 if !h.requireScopeStack {188 h.scopeStack = make([]string, 0)189 }190 h.requireScopeStack = true191 }192 _, err := writer.Write(content)193 if err != nil {194 return err195 }196 }197 }198 }199}...
Scope
Using AI Code Generation
1import (2func main() {3 doc, err := html.Parse(os.Stdin)4 if err != nil {5 fmt.Fprintf(os.Stderr, "findlinks1: %v\n", err)6 os.Exit(1)7 }8 for _, link := range visit(nil, doc) {9 fmt.Println(link)10 }11}12func visit(links []string, n *html.Node) []string {13 if n.Type == html.ElementNode && n.Data == "a" {14 for _, a := range n.Attr {15 if a.Key == "href" {16 links = append(links, a.Val)17 }18 }19 }20 for c := n.FirstChild; c != nil; c = c.NextSibling {21 links = visit(links, c)22 }23}
Scope
Using AI Code Generation
1import (2func main() {3 doc, err := html.Parse(os.Stdin)4 if err != nil {5 fmt.Fprintf(os.Stderr, "findlinks1: %v\n", err)6 os.Exit(1)7 }8 for _, link := range visit(nil, doc) {9 fmt.Println(link)10 }11}12func visit(links []string, n *html.Node) []string {13 if n.Type == html.ElementNode && n.Data == "a" {14 for _, a := range n.Attr {15 if a.Key == "href" {16 links = append(links, a.Val)17 }18 }19 }20 for c := n.FirstChild; c != nil; c = c.NextSibling {21 links = visit(links, c)22 }23}24import (25func main() {26 doc, err := html.Parse(os.Stdin)27 if err != nil {28 fmt.Fprintf(os.Stderr, "findlinks1: %v\n", err)29 os.Exit(1)30 }31 for _, link := range visit(nil, doc) {32 fmt.Println(link)33 }34}35func visit(links []string, n *html.Node) []string {36 if n.Type == html.ElementNode && n.Data == "a" {37 for _, a := range n.Attr {38 if a.Key == "href" {39 links = append(links, a.Val)40 }41 }42 }43 for c := n.FirstChild; c != nil; c = c.NextSibling {44 links = visit(links, c)45 }46}47import (48func main() {49 doc, err := html.Parse(os.Stdin)50 if err != nil {51 fmt.Fprintf(os.Stderr, "findlinks1: %v\n", err)52 os.Exit(1)53 }54 for _, link := range visit(nil, doc) {55 fmt.Println(link)56 }57}58func visit(links
Scope
Using AI Code Generation
1import (2func main() {3 doc, err := html.Parse(os.Stdin)4 if err != nil {5 fmt.Fprintf(os.Stderr, "findlinks1: %v\n", err)6 os.Exit(1)7 }8 for _, link := range visit(nil, doc) {9 fmt.Println(link)10 }11}12func visit(links []string, n *html.Node) []string {13 if n.Type == html.ElementNode && n.Data == "a" {14 for _, a := range n.Attr {15 if a.Key == "href" {16 links = append(links, a.Val)17 }18 }19 }20 for c := n.FirstChild; c != nil; c = c.NextSibling {21 links = visit(links, c)22 }23}
Scope
Using AI Code Generation
1import (2func main() {3 if err != nil {4 log.Fatal(err)5 }6 defer resp.Body.Close()7 doc, err := html.Parse(resp.Body)8 if err != nil {9 log.Fatal(err)10 }11 links := visit(nil, doc)12 for _, link := range links {13 fmt.Println(link)14 }15}16func visit(links []string, n *html.Node) []string {17 if n.Type == html.ElementNode && n.Data == "a" {18 for _, a := range n.Attr {19 if a.Key == "href" {20 links = append(links, a.Val)21 }22 }23 }24 for c := n.FirstChild; c != nil; c = c.NextSibling {25 links = visit(links, c)26 }27}
Scope
Using AI Code Generation
1import (2func main() {3 if err != nil {4 fmt.Println("Error in getting http response")5 }6 defer resp.Body.Close()7 body, err := ioutil.ReadAll(resp.Body)8 if err != nil {9 fmt.Println("Error in reading http response body")10 }11 doc, err := html.Parse(strings.NewReader(string(body)))12 if err != nil {13 fmt.Println("Error in parsing http response body")14 }15 links := visit(nil, doc)16 for _, link := range links {17 fmt.Println(link)18 }19}20func visit(links []string, n *html.Node) []string {21 if n.Type == html.ElementNode && n.Data == "a" {22 for _, a := range n.Attr {23 if a.Key == "href" {24 links = append(links, a.Val)25 }26 }27 }28 for c := n.FirstChild; c != nil; c = c.NextSibling {29 links = visit(links, c)30 }31}32import (33func main() {34 if err != nil {35 fmt.Println("Error in getting http response")36 }37 defer resp.Body.Close()38 body, err := ioutil.ReadAll(resp.Body)39 if err != nil {40 fmt.Println("Error in reading http response body")41 }42 doc, err := html.Parse(strings.NewReader(string(body)))43 if err != nil {44 fmt.Println("Error in parsing http response body")45 }46 links := visit(nil, doc)47 for _, link := range links {48 fmt.Println(link)49 }50}51func visit(links []string, n *html.Node) []string {52 if n.Type == html.ElementNode && n.Data == "a" {53 for _, a := range n.Attr {54 if a.Key == "href" {55 links = append(links, a.Val)56 }57 }58 }59 for c := n.FirstChild; c != nil; c = c.NextSibling {60 links = visit(links
Scope
Using AI Code Generation
1import (2func main() {3 for _, url := range os.Args[1:] {4 doc, err := html.Parse(get(url))5 if err != nil {6 fmt.Fprintf(os.Stderr, "findlinks1: %v\n", err)7 os.Exit(1)8 }9 for _, link := range visit(nil, doc) {10 fmt.Println(link)11 }12 }13}14func get(url string) *http.Response {15 resp, err := http.Get(url)16 if err != nil {17 fmt.Fprintf(os.Stderr, "findlinks1: %v\n", err)18 os.Exit(1)19 }20}21func visit(links []string, n *html.Node) []string {22 if n.Type == html.ElementNode && n.Data == "a" && n.FirstChild != nil {23 links = append(links, n.FirstChild.Data)24 }25 for c := n.FirstChild; c != nil; c = c.NextSibling {26 links = visit(links, c)27 }28}
Scope
Using AI Code Generation
1import (2func main() {3 if err != nil {4 log.Fatal(err)5 }6 doc.Find("table").Each(func(index int, item *goquery.Selection) {7 fmt.Println(item.Text())8 })9}
Scope
Using AI Code Generation
1import (2func main() {3 body, _ := ioutil.ReadAll(resp.Body)4 doc, _ := html.Parse(resp.Body)5 fmt.Println(doc)6 fmt.Println(body)7 fmt.Println(resp)8}
Scope
Using AI Code Generation
1import (2func main() {3 doc, err := html.Parse(strings.NewReader("<html><body><h1>Hello, World</h1></body></html>"))4 if err != nil {5 log.Fatal(err)6 }7 forEachNode(doc, startElement, endElement)8}9func forEachNode(n *html.Node, pre, post func(n *html.Node)) {10 if pre != nil {11 pre(n)12 }13 for c := n.FirstChild; c != nil; c = c.NextSibling {14 forEachNode(c, pre, post)15 }16 if post != nil {17 post(n)18 }19}20func startElement(n *html.Node) {21 if n.Type == html.ElementNode {22 fmt.Printf("<%s>\n", n.Data)23 }24}25func endElement(n *html.Node) {26 if n.Type == html.ElementNode {27 fmt.Printf("</%s>\n", n.Data)28 }29}30import (31func main() {32 doc, err := html.Parse(strings.NewReader("<html><body><h1>Hello, World</h1></body></html>"))33 if err != nil {34 log.Fatal(err)35 }36 forEachNode(doc, startElement, endElement)37}38func forEachNode(n *html.Node, pre, post func(n *html.Node)) {39 if pre != nil {40 pre(n)41 }42 for c := n.FirstChild; c != nil; c = c.NextSibling {43 forEachNode(c, pre, post)44 }45 if post != nil {46 post(n)47 }48}49func startElement(n *html.Node) {50 if n.Type == html.ElementNode {51 fmt.Printf("<%s", n.Data)52 for _, a := range n.Attr {53 fmt.Printf(" %s='%s'", a.Key, a.Val)54 }55 fmt.Printf(">\n")56 }57}58func endElement(n *html.Node) {59 if n.Type == html.ElementNode {60 fmt.Printf("</%s>\n", n.Data)61 }62}63import (
Scope
Using AI Code Generation
1import (2func main() {3 doc, err := html.Parse(strings.NewReader(htmlCode))4 if err != nil {5 panic(err)6 }7 tables := findElements(doc, "table", nil)8 th := findElements(tables[0], "th", nil)9 td := findElements(tables[0], "td", nil)10 tr := findElements(tables[0], "tr", nil)11 tbody := findElements(tables[0], "tbody", nil)12 thead := findElements(tables[0], "thead", nil)13 tfoot := findElements(tables[0], "tfoot", nil)14 caption := findElements(tables[0], "caption", nil)15 fmt.Println("th scope")16 for _, v := range th {17 fmt.Println(v.Data
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!