- charset = webpage.info().get_content_charset()
- if not charset:
- notice("TROUBLE READING PAGE TITLE: no charset in header")
- continue
- content_type = webpage.info().get_content_type()
- if content_type not in ('text/html', 'text/xml',
- 'application/xhtml+xml'):
- notice("TROUBLE READING PAGE TITLE: bad content type "
- + content_type)
- continue
- content = webpage.read().decode(charset)
- title = HTMLParser(content, "title").data
- title = html.unescape(title)
- notice("PAGE TITLE FOR URL: " + title)
+ title = bs4.BeautifulSoup(r.text).title
+ if title:
+ notice("PAGE TITLE FOR URL: " + title.string.strip())
+ else:
+ notice("PAGE HAS NO TITLE TAG")