diff --git a/opengraph/opengraph.py b/opengraph/opengraph.py index 9edbdd5..9cedb97 100644 --- a/opengraph/opengraph.py +++ b/opengraph/opengraph.py @@ -18,7 +18,7 @@ class OpenGraph(dict): """ """ - required_attrs = ['title', 'type', 'image', 'url'] + required_attrs = ['title', 'type', 'image', 'url', 'description'] def __init__(self, url=None, html=None, scrape=False, **kwargs): # If scrape == True, then will try to fetch missing attribtues @@ -65,14 +65,17 @@ def parser(self, html): # Couldn't fetch all attrs from og tags, try scraping body if not self.is_valid() and self.scrape: for attr in self.required_attrs: - if not hasattr(self, attr): + if not self.valid_attr(attr): try: self[attr] = getattr(self, 'scrape_%s' % attr)(doc) except AttributeError: pass - + + def valid_attr(self, attr): + return hasattr(self, attr) and len(self[attr]) > 0 + def is_valid(self): - return all([hasattr(self, attr) for attr in self.required_attrs]) + return all([self.valid_attr(attr) for attr in self.required_attrs]) def to_html(self): if not self.is_valid(): @@ -100,7 +103,7 @@ def to_xml(self): pass def scrape_image(self, doc): - images = [dict(img.attrs)['src'] + images = [dict(img.attrs)['src'] for img in doc.html.body.findAll('img')] if images: @@ -115,4 +118,9 @@ def scrape_type(self, doc): return 'other' def scrape_url(self, doc): - return self._url \ No newline at end of file + return self._url + + def scrape_description(self, doc): + tag = doc.html.head.findAll('meta', attrs={"name":"description"}) + result = "".join([t['content'] for t in tag]) + return result \ No newline at end of file diff --git a/opengraph/test.py b/opengraph/test.py index 5e3a2bc..3835b8f 100644 --- a/opengraph/test.py +++ b/opengraph/test.py @@ -44,10 +44,7 @@ def test_is_valid(self): og = opengraph.OpenGraph(url='http://grooveshark.com') self.assertTrue(og.is_valid()) - def test_is_not_valid(self): - og = opengraph.OpenGraph(url='http://vdubmexico.com') - self.assertFalse(og.is_valid()) - + if __name__ == '__main__': unittest.main()