How to use ParseHTML method of html Package

Best K6 code snippet using html.ParseHTML

parsehtml.go

Source:parsehtml.go Github

copy

Full Screen

1package parsehtml2import (3 "bufio"4 "io/ioutil"5 "os"6 "regexp"7 "strconv"8 "strings"9 "github.com/MaestroError/html-strings-affixer/config"10 "golang.org/x/exp/slices"11)12type Parsehtml struct {13 file string14 found_strings map[string][]map[string]string15 content string16 original_content string17 // options18 ignore_characters []string19 extractions []string20 // Affixes to search string21 prefix string22 suffix string23 // regex24 regexp *regexp.Regexp25 search_regex string26}27/*28*29 */30func (parse *Parsehtml) Init(file string, c config.Config) {31 parse.found_strings = make(map[string][]map[string]string)32 parse.SetFile(file)33 parse.getFileContent()34 // set options from config35 parse.setIgnoreCharacters(c.GetIgnoreCharacters())36 parse.setExtractions(c.GetAllowedMethods())37}38func (parse *Parsehtml) ParseFile(file string, c config.Config) *Parsehtml {39 parse.Init(file, c)40 if slices.Contains(parse.extractions, "text") {41 parse.ExtractText()42 }43 if slices.Contains(parse.extractions, "placeholder") {44 parse.ExtractPlaceholder()45 }46 if slices.Contains(parse.extractions, "alt") {47 parse.ExtractAlt()48 }49 if slices.Contains(parse.extractions, "title") {50 parse.ExtractTitle()51 }52 if slices.Contains(parse.extractions, "hastag") {53 parse.ExtractHashtag()54 }55 return parse56}57// setters58func (parse *Parsehtml) SetFile(file string) {59 parse.file = file60}61// Adds new string in found_strings62// sets trimmed string as "found" and original string as "original_string"63// type -> string describing type of visible html, you can specify it while calling parse.parseContent method64// lines -> lines where found string exists, you can get it with parse.findLineOfString method65func (parse *Parsehtml) AddNewString(found string, original_string string, found_type string, lines string) {66 foundObject := make(map[string]string)67 foundObject["found"] = found68 foundObject["original_string"] = original_string69 foundObject["type"] = found_type70 foundObject["lines"] = lines71 parse.found_strings["data"] = append(parse.found_strings["data"], foundObject)72}73func (parse *Parsehtml) GetFoundStrings() map[string][]map[string]string {74 return parse.found_strings75}76func (parse *Parsehtml) AddIgnoreCharacter(char string) {77 parse.ignore_characters = append(parse.ignore_characters, char)78}79func (parse *Parsehtml) SetPrefix(prefix string) {80 parse.prefix = prefix81}82func (parse *Parsehtml) SetSuffix(suffix string) {83 parse.suffix = suffix84}85// Simple strings extraction method - just plain strings in HTML86func (parse *Parsehtml) ExtractText() {87 // set affixes for simple strings extraction88 parse.SetPrefix("\\>")89 parse.SetSuffix("\\<")90 // Generates regex based on prefix, suffix and denied characters91 parse.generateRegex()92 // Parses content and adds strings in found_strings with specific type93 parse.parseContent("text")94}95// HTML input's Placeholders attributes extraction method96// XX - Can't use word "placeholder" inside placeholder - XX ?? why? it does well97func (parse *Parsehtml) ExtractPlaceholder() {98 // set affixes for simple strings extraction99 // (?i) = case insensitive100 parse.SetPrefix("(?i)placeholder=(\"|')")101 parse.SetSuffix("(\"|')")102 // Generates regex based on prefix, suffix and denied characters103 parse.generateRegex()104 // Parses content and adds strings in found_strings with specific type105 parse.parseContent("placeholder")106}107// HTML img's alt attributes extraction method108func (parse *Parsehtml) ExtractAlt() {109 // set affixes for simple strings extraction110 parse.SetPrefix("(?i)alt=(\"|')")111 parse.SetSuffix("(\"|')")112 // Generates regex based on prefix, suffix and denied characters113 parse.generateRegex()114 // Parses content and adds strings in found_strings with specific type115 parse.parseContent("alt")116}117// HTML title attributes extraction method118func (parse *Parsehtml) ExtractTitle() {119 // set affixes for simple strings extraction120 parse.SetPrefix("(?i)title=(\"|')")121 parse.SetSuffix("(\"|')")122 // Generates regex based on prefix, suffix and denied characters123 parse.generateRegex()124 // Parses content and adds strings in found_strings with specific type125 parse.parseContent("title")126}127// Extracts "#text" type (selected) strings128func (parse *Parsehtml) ExtractHashtag() {129 // set affixes for simple strings extraction130 parse.SetPrefix("(\"|'|>)\\s*#")131 parse.SetSuffix("(\"|'|<)")132 // Generates regex based on prefix, suffix and denied characters133 parse.generateRegex()134 // Parses content and adds strings in found_strings with specific type135 // @todo add "#" as strip to remove it while replacing136 parse.parseContent("hashtag")137}138// privates139func (parse *Parsehtml) setFoundStrings(found_strings map[string][]map[string]string) {140 parse.found_strings = found_strings141}142func (parse *Parsehtml) renewContent() {143 parse.content = parse.original_content144}145func (parse *Parsehtml) findLineOfString(str string) []string {146 f, err := os.Open(parse.file)147 if err != nil {148 // return 0, err149 panic(err)150 }151 defer f.Close()152 // Splits on newlines by default.153 scanner := bufio.NewScanner(f)154 foundOnLines := []string{}155 line := 1156 // check each line for founded string existence157 for scanner.Scan() {158 if strings.Contains(scanner.Text(), str) {159 // append line as string in foundOnLines array160 foundOnLines = append(foundOnLines, strconv.Itoa(line))161 }162 line++163 }164 if err := scanner.Err(); err != nil {165 // Handle the error166 panic(err)167 }168 return foundOnLines169}170// Reads file and sets content (as content and original_content properties)171func (parse *Parsehtml) getFileContent() {172 var r []byte173 var err error174 r, err = ioutil.ReadFile(parse.file)175 if err != nil {176 panic(err)177 }178 content := string(r)179 parse.content = content180 parse.original_content = content181}182func (parse *Parsehtml) setIgnoreCharacters(ignore_characters []string) {183 parse.ignore_characters = ignore_characters184}185func (parse *Parsehtml) setExtractions(allowed_parse_methods []string) {186 parse.extractions = allowed_parse_methods187}188// Generates regex based on prefix, suffix and denied characters189// sets search_regex as regular expression string190// and regexp as regexp object191func (parse *Parsehtml) generateRegex() {192 if parse.prefix != "" && parse.suffix != "" {193 deniedCharString := strings.Join(parse.ignore_characters, "\\")194 // [^\s+] -> used to not match whitespace195 reg := regexp.MustCompile(parse.prefix + `[^` + deniedCharString + `].[^\s+][^` + deniedCharString + `]+` + parse.suffix)196 parse.search_regex = reg.String()197 parse.regexp = reg198 }199}200// parses content, trims found strings and adds in found_strings if not already exists201func (parse *Parsehtml) parseContent(htmlType string) {202 // find all strings based on regex203 submatchall := parse.regexp.FindAllString(parse.content, -1)204 for _, element := range submatchall {205 // removes (trims) finding prefix and suffix206 re := regexp.MustCompile(parse.prefix)207 found := re.ReplaceAllString(element, "")208 re = regexp.MustCompile(parse.suffix)209 found = re.ReplaceAllString(found, "")210 // add as new string if no duplicates found211 if !parse.checkDuplicate(found) {212 lines := parse.findLineOfString(found)213 parse.AddNewString(found, element, htmlType, strings.Join(lines, ", "))214 }215 }216}217// check if string already exists in found strings218func (parse *Parsehtml) checkDuplicate(found string) bool {219 result := false220 // @todo check also type of string or "original_string" (maybe some string will need different methods to replace)221 for _, fs := range parse.found_strings[parse.file] {222 if fs["found"] == found {223 result = true224 break225 }226 }227 return result228}...

Full Screen

Full Screen

page_test.go

Source:page_test.go Github

copy

Full Screen

...11 panic(fmt.Sprintf("can't open test data: %v", err))12 }13 return f14}15func TestParseHTML(t *testing.T) {16 baseURL, err := SanitizedURLFromString("http://www.example.com/a/b/c")17 if err != nil {18 panic(fmt.Sprintf("can't build sanitized url"))19 }20 t.Run("一般的なHTMLの場合", func(t *testing.T) {21 html, err := ParseHTML(openTestData("testdata/test.html"), baseURL)22 if err != nil {23 t.Errorf("ParseHTML(testdata/test.html) = error, want = no error")24 return25 }26 if html.Title() != "テスト用HTML" {27 t.Errorf("ParseHTML(testdata/test.html).Title() = %s, want = \"テスト用HTML\"", html.Title())28 }29 if !html.NoIndex() {30 t.Errorf("ParseHTML(testdata/test.html).NoIndex() = false, want = true")31 }32 if len(html.AllURL()) != 3 {33 t.Errorf("len(ParseHTML(testdata/test.html).AllURL()) = %d, want = 3", len(html.AllURL()))34 }35 wantURL := []string{"http://example1.com", "https://example2.com", "http://www.example.com/a/b/rel.html"}36 for i, want := range wantURL {37 if html.AllURL()[i].String() != want {38 t.Errorf("ParseHTML(testdata/test.html).AllURL()[%d] = %s, want = %s", i, html.AllURL()[i].String(), want)39 }40 }41 })42 t.Run("nofollowが全面的に指定されているHTMLの場合", func(t *testing.T) {43 html, err := ParseHTML(openTestData("testdata/nofollow.html"), baseURL)44 if err != nil {45 t.Errorf("ParseHTML(testdata/nofollow.html) = error, want = no error")46 return47 }48 if html.Title() != "テスト用HTML" {49 t.Errorf("ParseHTML(testdata/nofollow.html).Title() = %s, want = \"テスト用HTML\"", html.Title())50 }51 if html.NoIndex() {52 t.Errorf("ParseHTML(testdata/nofollow.html).NoIndex() = true, want = false")53 }54 if len(html.AllURL()) != 0 {55 t.Errorf("len(ParseHTML(testdata/nofollow.html).AllURL()) = %d, want = 0", len(html.AllURL()))56 }57 })58}...

Full Screen

Full Screen

parsekind_yamlenums.go

Source:parsekind_yamlenums.go Github

copy

Full Screen

...6)7var (8 _ParseKindNameToValue = map[string]ParseKind{9 "ParseRaw": ParseRaw,10 "ParseHTML": ParseHTML,11 "ParseMarkdown": ParseMarkdown,12 }13 _ParseKindValueToName = map[ParseKind]string{14 ParseRaw: "ParseRaw",15 ParseHTML: "ParseHTML",16 ParseMarkdown: "ParseMarkdown",17 }18)19func init() {20 var v ParseKind21 if _, ok := interface{}(v).(fmt.Stringer); ok {22 _ParseKindNameToValue = map[string]ParseKind{23 interface{}(ParseRaw).(fmt.Stringer).String(): ParseRaw,24 interface{}(ParseHTML).(fmt.Stringer).String(): ParseHTML,25 interface{}(ParseMarkdown).(fmt.Stringer).String(): ParseMarkdown,26 }27 }28}29// MarshalYAML is generated so ParseKind satisfies yaml.Marshaler.30func (r ParseKind) MarshalYAML() ([]byte, error) {31 if s, ok := interface{}(r).(fmt.Stringer); ok {32 return yaml.Marshal(s.String())33 }34 s, ok := _ParseKindValueToName[r]35 if !ok {36 return nil, fmt.Errorf("invalid ParseKind: %d", r)37 }38 return yaml.Marshal(s)...

Full Screen

Full Screen

ParseHTML

Using AI Code Generation

copy

Full Screen

1import (2func main() {3 if err != nil {4 fmt.Println("Error in loading URL")5 }6}

Full Screen

Full Screen

ParseHTML

Using AI Code Generation

copy

Full Screen

1import (2func main() {3 z := html.NewTokenizer(res.Body)4 for {5 tt := z.Next()6 switch {7 t := z.Token()8 if isAnchor {9 for _, a := range t.Attr {10 if a.Key == "href" {11 fmt.Printf("Link: %q12 }13 }14 }15 }16 }17}

Full Screen

Full Screen

ParseHTML

Using AI Code Generation

copy

Full Screen

1import (2func main() {3 if err != nil {4 log.Fatal(err)5 }6 defer resp.Body.Close()7 doc, err := html.Parse(resp.Body)8 if err != nil {9 log.Fatal(err)10 }11 var f func(*html.Node)12 f = func(n *html.Node) {13 if n.Type == html.ElementNode {14 fmt.Println(n.Data)15 }16 for c := n.FirstChild; c != nil; c = c.NextSibling {17 f(c)18 }19 }20 f(doc)21}

Full Screen

Full Screen

ParseHTML

Using AI Code Generation

copy

Full Screen

1import (2func main() {3 doc, err := html.Parse(strings.NewReader("<html><head></head><body><h1>Hi</h1></body></html>"))4 if err != nil {5 fmt.Println("Error in parsing the HTML")6 }7 fmt.Println(doc)8}9&{0xc00000c0c0 [] []}10import (11func main() {12 doc, err := html.ParseFragment(strings.NewReader("<h1>Hi</h1>"), &html.Node{Type: html.ElementNode, Data: "body"})13 if err != nil {14 fmt.Println("Error in parsing the HTML")15 }16 fmt.Println(doc)17}18[&{0xc00000c0c0 [] []}]19import (20func main() {21 doc, err := html.Parse(strings.NewReader("<html><head></head><body><h1>Hi</h1></body></html

Full Screen

Full Screen

ParseHTML

Using AI Code Generation

copy

Full Screen

1import (2func main() {3 doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))4 if err != nil {5 log.Fatal(err)6 }7 fmt.Println(doc.Find("h1").Text())8}9import (10func main() {11 doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))12 if err != nil {13 log.Fatal(err)14 }15 fmt.Println(doc.Find("h1").Text())16}17import (18func main() {19 if err != nil {20 log.Fatal(err)21 }22 fmt.Println(doc.Find("h1").Text())23}24import (25func main() {26 doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))27 if err != nil {28 log.Fatal(err)29 }30 fmt.Println(doc.Find("h1").Text())31}32import (33func main() {34 if err != nil {35 log.Fatal(err)36 }37 fmt.Println(doc.Find("h1").Text())38}

Full Screen

Full Screen

ParseHTML

Using AI Code Generation

copy

Full Screen

1import (2func main() {3 if err != nil {4 panic(err)5 }6}7import (8func main() {9 if err != nil {10 panic(err)11 }12}13import (14func main() {15 if err != nil {16 panic(err)17 }18 fmt.Println(htmlquery.InnerText(html))19}20import (21func main() {22 if err != nil {23 panic(err)24 }25}26import (27func main() {28 if err != nil {29 panic(err)30 }31}32import (33func main() {34 if err != nil {35 panic(err)36 }37}38import (

Full Screen

Full Screen

ParseHTML

Using AI Code Generation

copy

Full Screen

1import "fmt"2import "github.com/antchfx/htmlquery"3func main() {4 if err != nil {5 panic(err)6 }7 for _, n := range nodes {8 fmt.Println(htmlquery.InnerText(n))9 }10}

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run K6 automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Most used method in

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful