diff --git a/pkg/registry/meta.go b/pkg/registry/meta.go index 9cc511b2..843315db 100644 --- a/pkg/registry/meta.go +++ b/pkg/registry/meta.go @@ -33,7 +33,10 @@ const ( keyPageTitle = "page_title" ) -var parentheses = regexp.MustCompile(`\(([^\)]+)\)`) +var ( + parentheses = regexp.MustCompile(`\(([^\)]+)\)`) + regexConfigurationBlock = regexp.MustCompile(`block.*support`) +) // NewProviderMetadata initializes a new ProviderMetadata for // extracting metadata from the Terraform registry. @@ -271,6 +274,61 @@ func getDescription(s string) string { return s } +func getRootPath(n *html.Node) string { + var ulNode, pNode, codeNode *html.Node + for ulNode = n.Parent; ulNode != nil && ulNode.Data != "ul"; ulNode = ulNode.Parent { + } + if ulNode == nil { + return "" + } + for pNode = ulNode; pNode != nil && (pNode.Data != "p" || !checkBlockParagraph(pNode)); pNode = pNode.PrevSibling { + } + if pNode == nil { + return "" + } + for codeNode = pNode.FirstChild; codeNode != nil && codeNode.Data != "code"; codeNode = codeNode.NextSibling { + } + if codeNode == nil || codeNode.FirstChild == nil { + return "" + } + prevLiNode := getPrevLiWithCodeText(codeNode.FirstChild.Data, pNode) + if prevLiNode == nil { + return codeNode.FirstChild.Data + } + root := getRootPath(prevLiNode) + if len(root) == 0 { + return codeNode.FirstChild.Data + } + return fmt.Sprintf("%s.%s", root, codeNode.FirstChild.Data) +} + +// returns the list item node (in an UL) with a code child with text `codeText` +func getPrevLiWithCodeText(codeText string, pNode *html.Node) *html.Node { + var ulNode, liNode *html.Node + for ulNode = pNode.PrevSibling; ulNode != nil && ulNode.Data != "ul"; ulNode = ulNode.PrevSibling { + } + if ulNode == nil { + return nil + } + for liNode = ulNode.FirstChild; liNode != nil; liNode = liNode.NextSibling { + if liNode.Data != "li" || liNode.FirstChild == nil || liNode.FirstChild.Data != "code" || liNode.FirstChild.FirstChild.Data != codeText { + continue + } + return liNode + } + return nil +} + +func checkBlockParagraph(p *html.Node) bool { + // traverse children of the paragraph node + for c := p.FirstChild; c != nil; c = c.NextSibling { + if regexConfigurationBlock.MatchString(c.Data) { + return true + } + } + return false +} + func (r *Resource) scrapeDocString(n *html.Node, attrName *string, processed map[*html.Node]struct{}) string { if _, ok := processed[n]; ok { return "" @@ -284,6 +342,9 @@ func (r *Resource) scrapeDocString(n *html.Node, attrName *string, processed map sb := strings.Builder{} if *attrName == "" { *attrName = n.Data + if root := getRootPath(n); len(root) != 0 { + *attrName = fmt.Sprintf("%s.%s", root, *attrName) + } } else { sb.WriteString(n.Data) }