unicode - Python: 'ascii' codec can't encode character u'\\u2026' -
i trying use bing api in python following code:
#!/usr/bin/python bingapi import bingapi import re import json import urllib import cgi import cgitb htmlparser import htmlparser class mlstripper(htmlparser): def __init__(self): self.reset() self.fed = [] def handle_data(self, d): self.fed.append(d) def get_data(self): return ''.join(self.fed) def strip_tags(html): s = mlstripper() s.feed(html) return s.get_data() def strip_tags2(data): p = re.compile(r'<[^<]*?>') q = re.compile(r'[&;!@#$%^*()]*') data = p.sub('', data) return q.sub('', data) def geturl(item): return item['url'] def getcontent(item): return item['description'] def gettitle(item): return item['title'] def getinfo(qry, sitestr): qrystr = qry + "+" + sitestr #qrystr = u"%s" % qrystr.encode('utf-8') query = urllib.urlencode({'q' : qrystr}) url = 'http://api.bing.net/json.aspx?appid=<myappid>&version=2.2&market=en-us&query=%s&sources=web&web.count=10&jsontype=raw' % (query) search_results = urllib.urlopen(url) j = json.loads(search_results.read()) results = j['searchresponse']['web']['results'] return results def updaterecent(qry): f = open("recent.txt", "r") lines = f.readlines() f.close() lines = lines[1:] if len(qry) > 50: #truncate if string long qry = (qry[:50] + '...') qry = strip_tags2(qry) #strip out html if injection try lines.append("\n%s" % qry) f = open("recent.txt", "w") f.writelines(lines) f.close() if __name__ == '__main__': form = cgi.fieldstorage() qry = form["qry"].value qry = r'%s' % qry updaterecent(qry) sitestr = "(site:answers.yahoo.com or site:chacha.com or site:blurtit.com or site:answers.com or site:question.com or site:answerbag.com or site:stackexchange.com)" print "content-type: text/html" print header = open("header.html", "r") contents = header.readlines() header.close() item in contents: print item print """ <div id="results"> <center><h1>results:</h1></center> """ item in getinfo(sitestr, qry): print "<h3>%s</h3>" % gettitle(item) print "<br />" print "%s" % geturl(item) print "<br />" print "<p style=\"color:gray\">%s</p>" % getcontent(item) print "<br />" print "</div>" footer = open("footer.html", "r") contents = footer.readlines() footer.close() thing in contents: print thing
i prints few results, , gives me following error:
unicodeencodeerror: 'ascii' codec can't encode character u'\\u2026' in position 72: ordinal not in range(128)
can explain why happening? has how url getting encoded, wrong? in advance!
that particular unicode character "horizontal ellipsis". 1 or more of getxxxxx() functions returning unicode strings, 1 of contains non-ascii character. suggest declaring encoding of output, example:
content-type: text/html; charset=utf-8
and explicitly encoding output in encoding.
Comments
Post a Comment