unicode - Python: 'ascii' codec can't encode character u'\\u2026' -


i trying use bing api in python following code:

#!/usr/bin/python bingapi import bingapi   import re import json import urllib import cgi import cgitb htmlparser import htmlparser  class mlstripper(htmlparser):     def __init__(self):             self.reset()             self.fed = []     def handle_data(self, d):             self.fed.append(d)     def get_data(self):             return ''.join(self.fed)  def strip_tags(html):     s = mlstripper()     s.feed(html)     return s.get_data()  def strip_tags2(data):     p = re.compile(r'<[^<]*?>')     q = re.compile(r'[&;!@#$%^*()]*')     data = p.sub('', data)     return q.sub('', data)  def geturl(item):     return item['url']  def getcontent(item):     return item['description']  def gettitle(item):     return item['title']  def getinfo(qry, sitestr):     qrystr = qry + "+" + sitestr     #qrystr = u"%s" % qrystr.encode('utf-8')     query = urllib.urlencode({'q' : qrystr})     url = 'http://api.bing.net/json.aspx?appid=<myappid>&version=2.2&market=en-us&query=%s&sources=web&web.count=10&jsontype=raw' % (query)     search_results = urllib.urlopen(url)     j = json.loads(search_results.read())     results = j['searchresponse']['web']['results']     return results  def updaterecent(qry):     f = open("recent.txt", "r")     lines = f.readlines()     f.close()     lines = lines[1:]      if len(qry) > 50: #truncate if string long             qry = (qry[:50] + '...')     qry = strip_tags2(qry) #strip out html if injection try      lines.append("\n%s" % qry)     f = open("recent.txt", "w")     f.writelines(lines)     f.close()  if __name__ == '__main__':     form = cgi.fieldstorage()     qry = form["qry"].value     qry = r'%s' % qry      updaterecent(qry)      sitestr = "(site:answers.yahoo.com or site:chacha.com or site:blurtit.com or site:answers.com or site:question.com or site:answerbag.com or site:stackexchange.com)"      print "content-type: text/html"     print      header = open("header.html", "r")     contents = header.readlines()     header.close()     item in contents:             print item      print """     <div id="results">     <center><h1>results:</h1></center>     """     item in getinfo(sitestr, qry):             print "<h3>%s</h3>" % gettitle(item)             print "<br />"             print "%s" % geturl(item)             print "<br />"             print "<p style=\"color:gray\">%s</p>" % getcontent(item)             print "<br />"     print "</div>"      footer = open("footer.html", "r")     contents = footer.readlines()     footer.close()     thing in contents:             print thing 

i prints few results, , gives me following error:

unicodeencodeerror: 'ascii' codec can't encode character u'\\u2026' in position 72:    ordinal not in range(128) 

can explain why happening? has how url getting encoded, wrong? in advance!

that particular unicode character "horizontal ellipsis". 1 or more of getxxxxx() functions returning unicode strings, 1 of contains non-ascii character. suggest declaring encoding of output, example:

content-type: text/html; charset=utf-8 

and explicitly encoding output in encoding.


Comments

Popular posts from this blog

javascript - Enclosure Memory Copies -

php - Replacing tags in braces, even nested tags, with regex -