Skip to content

Commit

Permalink
Merge branch 'master' of github.com:MontFerret/ferret
Browse files Browse the repository at this point in the history
  • Loading branch information
ziflex committed Sep 2, 2021
2 parents aeb1247 + e6dd568 commit 86d5e45
Show file tree
Hide file tree
Showing 58 changed files with 2,745 additions and 1,498 deletions.
158 changes: 133 additions & 25 deletions e2e/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@ import (
"encoding/json"
"flag"
"fmt"
"github.com/rs/zerolog"
"io/ioutil"
"os"
"path/filepath"
"strings"

"github.com/MontFerret/ferret"
"github.com/MontFerret/ferret/pkg/drivers"
"github.com/MontFerret/ferret/pkg/drivers/cdp"
"github.com/MontFerret/ferret/pkg/drivers/http"
"github.com/MontFerret/ferret/pkg/runtime"
"github.com/MontFerret/ferret/pkg/runtime/core"
"github.com/MontFerret/ferret/pkg/runtime/logging"
)

type Params []string
Expand Down Expand Up @@ -61,8 +63,16 @@ var (
"",
"set CDP address",
)

logLevel = flag.String(
"log-level",
logging.ErrorLevel.String(),
"log level",
)
)

var logger zerolog.Logger

func main() {
var params Params

Expand All @@ -74,10 +84,21 @@ func main() {

flag.Parse()

var query string
console := zerolog.ConsoleWriter{
Out: os.Stderr,
TimeFormat: "15:04:05.999",
}
logger = zerolog.New(console).
Level(zerolog.Level(logging.MustParseLevel(*logLevel))).
With().
Timestamp().
Logger()

stat, _ := os.Stdin.Stat()

var query string
var files []string

if (stat.Mode() & os.ModeCharDevice) == 0 {
// check whether the app is getting a query via standard input
std := bufio.NewReader(os.Stdin)
Expand All @@ -91,18 +112,10 @@ func main() {

query = string(b)
} else if flag.NArg() > 0 {
// backward compatibility
content, err := ioutil.ReadFile(flag.Arg(0))

if err != nil {
fmt.Println(err)
os.Exit(1)
}

query = string(content)
files = flag.Args()
} else {
fmt.Println(flag.NArg())
fmt.Println("Missed file")
fmt.Println("File or input stream are required")
os.Exit(1)
}

Expand All @@ -113,26 +126,121 @@ func main() {
os.Exit(1)
}

if err := execFile(query, p); err != nil {
engine := ferret.New()
_ = engine.Drivers().Register(http.NewDriver())
_ = engine.Drivers().Register(cdp.NewDriver(cdp.WithAddress(*conn)))

opts := []runtime.Option{
runtime.WithParams(p),
runtime.WithLog(console),
runtime.WithLogLevel(logging.MustParseLevel(*logLevel)),
}

if query != "" {
err = execQuery(engine, opts, query)
} else {
err = execFiles(engine, opts, files)
}

if err != nil {
fmt.Println(err)
os.Exit(1)
}
}

func execFile(query string, params map[string]interface{}) error {
ctx := drivers.WithContext(
context.Background(),
http.NewDriver(),
drivers.AsDefault(),
)
func execFiles(engine *ferret.Instance, opts []runtime.Option, files []string) error {
errList := make([]error, 0, len(files))

ctx = drivers.WithContext(
ctx,
cdp.NewDriver(cdp.WithAddress(*conn)),
)
for _, path := range files {
log := logger.With().Str("path", path).Logger()
log.Debug().Msg("checking path...")

info, err := os.Stat(path)

if err != nil {
log.Debug().Err(err).Msg("failed to get path info")

errList = append(errList, err)
continue
}

if info.IsDir() {
log.Debug().Msg("path points to a directory. retrieving list of files...")

fileInfos, err := ioutil.ReadDir(path)

if err != nil {
log.Debug().Err(err).Msg("failed to retrieve list of files")

errList = append(errList, err)
continue
}

log.Debug().Int("size", len(fileInfos)).Msg("retrieved list of files. starting to iterate...")

dirFiles := make([]string, 0, len(fileInfos))

for _, info := range fileInfos {
if filepath.Ext(info.Name()) == ".fql" {
dirFiles = append(dirFiles, filepath.Join(path, info.Name()))
}
}

if len(dirFiles) > 0 {
if err := execFiles(engine, opts, dirFiles); err != nil {
log.Debug().Err(err).Msg("failed to execute files")

errList = append(errList, err)
} else {
log.Debug().Int("size", len(fileInfos)).Err(err).Msg("successfully executed files")
}
} else {
log.Debug().Int("size", len(fileInfos)).Err(err).Msg("no FQL files found")
}

continue
}

log.Debug().Msg("path points to a file. starting to read content")

out, err := ioutil.ReadFile(path)

if err != nil {
log.Debug().Err(err).Msg("failed to read content")

errList = append(errList, err)
continue
}

log.Debug().Msg("successfully read file")
log.Debug().Msg("executing file...")
err = execQuery(engine, opts, string(out))

if err != nil {
log.Debug().Err(err).Msg("failed to execute file")

errList = append(errList, err)
continue
}

log.Debug().Msg("successfully executed file")
}

if len(errList) > 0 {
if len(errList) == len(files) {
logger.Debug().Errs("errors", errList).Msg("failed to execute file(s)")
} else {
logger.Debug().Errs("errors", errList).Msg("executed with errors")
}

return core.Errors(errList...)
}

return nil
}

i := ferret.New()
out, err := i.Exec(ctx, query, runtime.WithParams(params))
func execQuery(engine *ferret.Instance, opts []runtime.Option, query string) error {
out, err := engine.Exec(context.Background(), query, opts...)

if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion e2e/tests/dynamic/doc/click/click.fql
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ CLICK(page, "#wait-class-random-btn")

WAIT_CLASS(page, "#wait-class-random-content", "alert-success")

RETURN ""
RETURN TRUE
9 changes: 5 additions & 4 deletions e2e/tests/dynamic/element/wait/style.fql
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@ LET doc = DOCUMENT(url, true)
WAIT_ELEMENT(doc, "#page-events")

LET el = ELEMENT(doc, "#wait-class-content")
LET original = el.style.color

ATTR_SET(el, "style", "color: black")
WAIT_STYLE(el, "color", "black")
WAIT_STYLE(el, "color", "rgb(0, 0, 0)")

LET prev = el.style

ATTR_REMOVE(el, "style")
WAIT_STYLE(el, "color", NONE)
WAIT_STYLE(el, "color", original)

LET curr = el.style

T::EQ(prev.color, "black")
T::NONE(curr.color, "style should be removed")
T::EQ(prev.color, "rgb(0, 0, 0)")
T::EQ(curr.color, original, "style should be returned to original")

RETURN NONE
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
12 changes: 5 additions & 7 deletions examples/click.fql
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
LET doc = DOCUMENT("https://github.com/", { driver: "cdp" })
LET doc = DOCUMENT("https://www.montferret.dev/", { driver: "cdp" })

HOVER(doc, ".HeaderMenu-details")
CLICK(doc, ".HeaderMenu a")
CLICK(doc, "#repl")

WAIT_NAVIGATION(doc)
WAIT_ELEMENT(doc, 'main nav')
WAITFOR EVENT "navigation" IN doc
WAIT_ELEMENT(doc, '.code-editor-text')

FOR el IN ELEMENTS(doc, 'main nav a')
RETURN TRIM(el.innerText)
RETURN doc.url
15 changes: 13 additions & 2 deletions examples/crawler.fql
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
LET doc = DOCUMENT('https://www.theverge.com/tech', {
driver: "cdp"
driver: "cdp",
ignore: {
resources: [
{
url: "*",
type: "image"
}
]
}
})

WAIT_ELEMENT(doc, '.c-compact-river__entry', 5000)
LET articles = ELEMENTS(doc, '.c-entry-box--compact__image-wrapper')
LET links = (
FOR article IN articles
FILTER article.attributes?.href LIKE 'https://www.theverge.com/*'
RETURN article.attributes.href
)

FOR link IN links
// The Verge has pretty heavy pages, so let's increase the navigation wait time
NAVIGATE(doc, link, 20000)
WAIT_ELEMENT(doc, '.c-entry-content', 5000)
WAIT_ELEMENT(doc, '.c-entry-content', 15000)
LET texter = ELEMENT(doc, '.c-entry-content')
RETURN texter.innerText
2 changes: 1 addition & 1 deletion examples/disable-images.fql
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ LET p = DOCUMENT("https://www.gettyimages.com/", {
}
})

RETURN NONE
RETURN TRUE
24 changes: 13 additions & 11 deletions examples/google-search.fql
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,22 @@ LET google = DOCUMENT("https://www.google.com/", {
HOVER(google, 'input[name="q"]')
WAIT(RAND(100))
INPUT(google, 'input[name="q"]', @criteria, 30)

WAIT(RAND(100))

WAIT_ELEMENT(google, '.UUbT9')
WAIT(RAND(100))
CLICK(google, 'input[name="btnK"]')

WAIT_NAVIGATION(google)
WAITFOR EVENT "navigation" IN google

WAIT_ELEMENT(google, "#res")

FOR el IN ELEMENTS(google, '#kp-wp-tab-overview > [jsdata]')
// filter out extra elements like media and 'People also ask'
FILTER ELEMENT_EXISTS(el, "#media_result_group") == FALSE
FILTER ELEMENT_EXISTS(el, '[role="heading"]') == FALSE

LET descr = (FOR i IN ELEMENTS(el, "span") FILTER LENGTH(i.attributes) == 0 RETURN i)

FOR result IN ELEMENTS(google, '.g')
// filter out extra elements like videos and 'People also ask'
FILTER TRIM(result.attributes.class) == 'g'
RETURN {
title: INNER_TEXT(result, 'h3'),
description: INNER_TEXT(result, '.rc > div:nth-child(2) span'),
url: INNER_TEXT(result, 'cite')
title: INNER_TEXT(el, 'h3'),
description: FIRST(descr),
url: ELEMENT(el, 'a')?.attributes.href
}
2 changes: 1 addition & 1 deletion examples/history-api.fql
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ CLICK(song)
WAIT_ELEMENT(doc, ".l-listen-hero")

RETURN {
page: page.url,
current: page.url,
first: doc.url
}
12 changes: 6 additions & 6 deletions examples/pagination.fql
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,23 @@ LET baseURL = 'https://www.amazon.com/'
LET amazon = DOCUMENT(baseURL, { driver: "cdp" })

INPUT(amazon, '#twotabsearchtextbox', @criteria)
CLICK(amazon, '.nav-search-submit input[type="submit"]')
CLICK(amazon, '#nav-search-submit-button')
WAIT_NAVIGATION(amazon)

LET resultListSelector = 'div.s-result-list'
LET resultListSelector = '[data-component-type="s-search-results"]'
LET resultItemSelector = '[data-component-type="s-search-result"]'
LET nextBtnSelector = 'ul.a-pagination .a-last a'
LET nextBtnSelector = '.s-pagination-next:not(.s-pagination-disabled)'
LET priceWholeSelector = '.a-price-whole'
LET priceFracSelector = '.a-price-fraction'
LET pagers = ELEMENTS(amazon, 'ul.a-pagination li.a-disabled')
LET pagers = ELEMENTS(amazon, '.s-pagination-item.s-pagination-disabled')
LET pages = LENGTH(pagers) > 0 ? TO_INT(INNER_TEXT(LAST(pagers))) : 0

LET result = (
FOR pageNum IN 1..pages
LIMIT @pages

LET clicked = pageNum == 1 ? false : CLICK(amazon, nextBtnSelector)
LET wait = clicked ? WAIT_NAVIGATION(amazon, 10000) : false
LET waitSelector = wait ? WAIT_ELEMENT(amazon, resultListSelector) : false
LET waitSelector = clicked ? WAIT_ELEMENT(amazon, resultListSelector) : false

PRINT("page:", pageNum, "clicked", clicked)

Expand All @@ -32,6 +31,7 @@ LET result = (
LET anchor = ELEMENT(el, "a")

RETURN {
page: pageNum,
url: baseURL + anchor.attributes.href,
title: INNER_TEXT(el, 'h2'),
price
Expand Down
Loading

0 comments on commit 86d5e45

Please sign in to comment.