-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
176 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# gurl http extensions | ||
|
||
## awsapi | ||
Enables AWS Signature V4 for HTTP I/O. Allows to use AWS API Gateway with IAM authentication. | ||
|
||
## xhtml | ||
Enables fetching and parsing xHTML content. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
module github.com/fogfish/gurl/http/x/xhtml | ||
|
||
go 1.21.0 | ||
|
||
require ( | ||
github.com/PuerkitoBio/goquery v1.9.1 | ||
github.com/fogfish/gurl/v2 v2.9.0 | ||
github.com/fogfish/it/v2 v2.0.1 | ||
) | ||
|
||
require ( | ||
github.com/ajg/form v1.5.2-0.20200323032839-9aeb3cf462e1 // indirect | ||
github.com/andybalholm/cascadia v1.3.2 // indirect | ||
github.com/google/go-cmp v0.6.0 // indirect | ||
golang.org/x/net v0.21.0 // indirect | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VPW7UI= | ||
github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY= | ||
github.com/ajg/form v1.5.2-0.20200323032839-9aeb3cf462e1 h1:8Qzi+0Uch1VJvdrOhJ8U8FqoPLbUdETPgMqGJ6DSMSQ= | ||
github.com/ajg/form v1.5.2-0.20200323032839-9aeb3cf462e1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= | ||
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= | ||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= | ||
github.com/fogfish/gurl/v2 v2.9.0 h1:IZlOxZte9y+NFgKHkPYw5jS0VCNG9avu2Ywrm2c3S6k= | ||
github.com/fogfish/gurl/v2 v2.9.0/go.mod h1:vBqw+SCrfOPNllWDCwPnuotrNeuuyTsYZ28iP13qF3Y= | ||
github.com/fogfish/it/v2 v2.0.1 h1:vu3kV2xzYDPHoMHMABxXeu5CoMcTfRc4gkWkzOUkRJY= | ||
github.com/fogfish/it/v2 v2.0.1/go.mod h1:h5FdKaEQT4sUEykiVkB8VV4jX27XabFVeWhoDZaRZtE= | ||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= | ||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= | ||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= | ||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= | ||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= | ||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= | ||
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= | ||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= | ||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= | ||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= | ||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= | ||
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= | ||
golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4= | ||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= | ||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= | ||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= | ||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= | ||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | ||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | ||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= | ||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= | ||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= | ||
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= | ||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | ||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= | ||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= | ||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= | ||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= | ||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= | ||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= | ||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= | ||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= | ||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
// | ||
// Copyright (C) 2019 - 2024 Dmitry Kolesnikov | ||
// | ||
// This file may be modified and distributed under the terms | ||
// of the MIT license. See the LICENSE file for details. | ||
// https://github.com/fogfish/gurl | ||
// | ||
|
||
// Package xhtml is an extension to gurl library for fetching and parsing | ||
// html using query selectors. | ||
package xhtml | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
|
||
"github.com/PuerkitoBio/goquery" | ||
"github.com/fogfish/gurl/v2/http" | ||
ø "github.com/fogfish/gurl/v2/http/recv" | ||
ƒ "github.com/fogfish/gurl/v2/http/send" | ||
) | ||
|
||
// HTTP Arrow receive and parses HTML content | ||
func Content(content **goquery.Selection) http.Arrow { | ||
b := &bytes.Buffer{} | ||
|
||
return http.Join( | ||
ø.Bytes(b), | ||
func(ctx *http.Context) error { | ||
doc, err := goquery.NewDocumentFromReader(b) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
c := doc.Selection | ||
*content = c | ||
|
||
return nil | ||
}, | ||
) | ||
} | ||
|
||
// HTTP Client tailored for fetching HTML content from public resources | ||
type Client struct { | ||
http.Stack | ||
} | ||
|
||
// Create new instance of HTTP Client tailored for fetching HTML content | ||
// from public resources. | ||
func New(opts ...http.Config) Client { | ||
return Client{Stack: http.New(opts...)} | ||
} | ||
|
||
// Fetches HTML content behind url, parser HTML. | ||
func (site Client) Fetch(ctx context.Context, url string) (*goquery.Selection, error) { | ||
var content *goquery.Selection | ||
|
||
err := site.Stack.IO(context.Background(), | ||
http.GET( | ||
ƒ.URI(url), | ||
ƒ.Accept.HTML, | ||
|
||
ø.Status.OK, | ||
Content(&content), | ||
), | ||
) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return content, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// | ||
// Copyright (C) 2019 - 2024 Dmitry Kolesnikov | ||
// | ||
// This file may be modified and distributed under the terms | ||
// of the MIT license. See the LICENSE file for details. | ||
// https://github.com/fogfish/gurl | ||
// | ||
|
||
package xhtml_test | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/fogfish/gurl/http/x/xhtml" | ||
"github.com/fogfish/gurl/v2/http/mock" | ||
"github.com/fogfish/it/v2" | ||
) | ||
|
||
func TestFetch(t *testing.T) { | ||
web := mock.New( | ||
mock.Header("Content-Type", "text/html"), | ||
mock.Body([]byte("<html><body><div>Hello World</div></body></html>")), | ||
) | ||
|
||
cli := xhtml.New(web) | ||
c, err := cli.Fetch(context.Background(), "http://example.com/") | ||
|
||
it.Then(t).Should( | ||
it.Nil(err), | ||
it.Equal(c.Find("div").Text(), "Hello World"), | ||
) | ||
} |