
# HTML entities (as defined by XHTML 1.0) to unicode
# EXCEPT: lt (<), gt (>), amp (&), quot ("), and apos (')
# works on Python 2.2, not sure about earlier versions 
xhtml_entities = { "&zwnj;" : u"\u200c", "&aring;" : u"\u00e5", 
                   "&yen;" : u"\u00a5", "&ograve;" : u"\u00f2", "&Chi;" : u"\u03a7", 
                   "&delta;" : u"\u03b4", "&rang;" : u"\u232a", "&sup;" : u"\u2283", 
                   "&trade;" : u"\u2122", "&Ntilde;" : u"\u00d1", "&xi;" : u"\u03be", 
                   "&upsih;" : u"\u03d2", "&nbsp;" : u"\u00a0", "&Atilde;" : u"\u00c3", 
                   "&radic;" : u"\u221a", "&otimes;" : u"\u2297", "&nabla;" : u"\u2207", 
                   "&aelig;" : u"\u00e6", "&oelig;" : u"\u0153", "&equiv;" : u"\u2261", 
                   "&lArr;" : u"\u21d0", "&infin;" : u"\u221e", "&Psi;" : u"\u03a8", 
                   "&auml;" : u"\u00e4", "&circ;" : u"\u02c6", "&Epsilon;" : u"\u0395", 
                   "&otilde;" : u"\u00f5", "&Icirc;" : u"\u00ce", "&Eacute;" : u"\u00c9", 
                   "&ndash;" : u"\u2013", "&sbquo;" : u"\u201a", "&Prime;" : u"\u2033", 
                   "&prime;" : u"\u2032", "&psi;" : u"\u03c8", "&Kappa;" : u"\u039a", 
                   "&rsaquo;" : u"\u203a", "&Tau;" : u"\u03a4", "&uacute;" : u"\u00fa", 
                   "&ocirc;" : u"\u00f4", "&lrm;" : u"\u200e", "&lceil;" : u"\u2308", 
                   "&cedil;" : u"\u00b8", "&Alpha;" : u"\u0391", "&not;" : u"\u00ac", 
                   "&Dagger;" : u"\u2021", "&AElig;" : u"\u00c6", "&ni;" : u"\u220b", 
                   "&oslash;" : u"\u00f8", "&acute;" : u"\u00b4", "&zwj;" : u"\u200d", 
                   "&alefsym;" : u"\u2135", "&laquo;" : u"\u00ab", "&shy;" : u"\u00ad", 
                   "&rdquo;" : u"\u201d", "&ge;" : u"\u2265", "&Igrave;" : u"\u00cc", 
                   "&nu;" : u"\u03bd", "&Ograve;" : u"\u00d2", "&lsaquo;" : u"\u2039", 
                   "&sube;" : u"\u2286", "&rarr;" : u"\u2192", "&sdot;" : u"\u22c5", 
                   "&supe;" : u"\u2287", "&Yacute;" : u"\u00dd", "&lfloor;" : u"\u230a", 
                   "&rlm;" : u"\u200f", "&Auml;" : u"\u00c4", "&brvbar;" : u"\u00a6", 
                   "&Otilde;" : u"\u00d5", "&szlig;" : u"\u00df", "&clubs;" : u"\u2663", 
                   "&diams;" : u"\u2666", "&agrave;" : u"\u00e0", "&Ocirc;" : u"\u00d4", 
                   "&Iota;" : u"\u0399", "&Theta;" : u"\u0398", "&Pi;" : u"\u03a0", 
                   "&OElig;" : u"\u0152", "&Scaron;" : u"\u0160", "&frac14;" : u"\u00bc", 
                   "&egrave;" : u"\u00e8", "&sub;" : u"\u2282", "&iexcl;" : u"\u00a1", 
                   "&frac12;" : u"\u00bd", "&ordf;" : u"\u00aa", "&sum;" : u"\u2211", 
                   "&prop;" : u"\u221d", "&Uuml;" : u"\u00dc", "&ntilde;" : u"\u00f1", 
                   "&atilde;" : u"\u00e3", "&asymp;" : u"\u2248", "&uml;" : u"\u00a8", 
                   "&prod;" : u"\u220f", "&nsub;" : u"\u2284", "&reg;" : u"\u00ae", 
                   "&rArr;" : u"\u21d2", "&Oslash;" : u"\u00d8", "&THORN;" : u"\u00de", 
                   "&yuml;" : u"\u00ff", "&aacute;" : u"\u00e1", "&Mu;" : u"\u039c", 
                   "&hArr;" : u"\u21d4", "&le;" : u"\u2264", "&thinsp;" : u"\u2009", 
                   "&dArr;" : u"\u21d3", "&ecirc;" : u"\u00ea", "&bdquo;" : u"\u201e", 
                   "&Sigma;" : u"\u03a3", "&kappa;" : u"\u03ba", "&Aring;" : u"\u00c5", 
                   "&tilde;" : u"\u02dc", "&emsp;" : u"\u2003", "&mdash;" : u"\u2014", 
                   "&uarr;" : u"\u2191", "&times;" : u"\u00d7", "&Ugrave;" : u"\u00d9", 
                   "&Eta;" : u"\u0397", "&Agrave;" : u"\u00c0", "&chi;" : u"\u03c7", 
                   "&real;" : u"\u211c", "&eth;" : u"\u00f0", "&rceil;" : u"\u2309", 
                   "&iuml;" : u"\u00ef", "&gamma;" : u"\u03b3", "&lambda;" : u"\u03bb", 
                   "&harr;" : u"\u2194", "&Egrave;" : u"\u00c8", "&frac34;" : u"\u00be", 
                   "&dagger;" : u"\u2020", "&divide;" : u"\u00f7", "&Ouml;" : u"\u00d6", 
                   "&image;" : u"\u2111", "&hellip;" : u"\u2026", "&igrave;" : u"\u00ec", 
                   "&Yuml;" : u"\u0178", "&ang;" : u"\u2220", "&alpha;" : u"\u03b1", 
                   "&frasl;" : u"\u2044", "&ETH;" : u"\u00d0", "&lowast;" : u"\u2217", 
                   "&Nu;" : u"\u039d", "&plusmn;" : u"\u00b1", "&bull;" : u"\u2022", 
                   "&sup1;" : u"\u00b9", "&sup2;" : u"\u00b2", "&sup3;" : u"\u00b3", 
                   "&Aacute;" : u"\u00c1", "&cent;" : u"\u00a2", "&oline;" : u"\u203e", 
                   "&Beta;" : u"\u0392", "&perp;" : u"\u22a5", "&Delta;" : u"\u0394", 
                   "&there4;" : u"\u2234", "&pi;" : u"\u03c0", "&iota;" : u"\u03b9", 
                   "&scaron;" : u"\u0161", "&euml;" : u"\u00eb", "&notin;" : u"\u2209", 
                   "&iacute;" : u"\u00ed", "&para;" : u"\u00b6", "&epsilon;" : u"\u03b5", 
                   "&weierp;" : u"\u2118", "&uuml;" : u"\u00fc", "&larr;" : u"\u2190", 
                   "&icirc;" : u"\u00ee", "&Upsilon;" : u"\u03a5", "&omicron;" : u"\u03bf", 
                   "&upsilon;" : u"\u03c5", "&copy;" : u"\u00a9", "&Iuml;" : u"\u00cf", 
                   "&Oacute;" : u"\u00d3", "&Xi;" : u"\u039e", "&ensp;" : u"\u2002", 
                   "&ccedil;" : u"\u00e7", "&Ucirc;" : u"\u00db", "&cap;" : u"\u2229", 
                   "&mu;" : u"\u03bc", "&empty;" : u"\u2205", "&lsquo;" : u"\u2018", 
                   "&isin;" : u"\u2208", "&Zeta;" : u"\u0396", "&minus;" : u"\u2212", 
                   "&loz;" : u"\u25ca", "&deg;" : u"\u00b0", "&and;" : u"\u2227", 
                   "&tau;" : u"\u03c4", "&pound;" : u"\u00a3", "&curren;" : u"\u00a4", 
                   "&int;" : u"\u222b", "&ucirc;" : u"\u00fb", "&rfloor;" : u"\u230b", 
                   "&crarr;" : u"\u21b5", "&ugrave;" : u"\u00f9", "&exist;" : u"\u2203", 
                   "&cong;" : u"\u2245", "&theta;" : u"\u03b8", "&oplus;" : u"\u2295", 
                   "&permil;" : u"\u2030", "&Acirc;" : u"\u00c2", "&piv;" : u"\u03d6", 
                   "&Euml;" : u"\u00cb", "&Phi;" : u"\u03a6", "&Iacute;" : u"\u00cd", 
                   "&Uacute;" : u"\u00da", "&Omicron;" : u"\u039f", "&ne;" : u"\u2260", 
                   "&iquest;" : u"\u00bf", "&eta;" : u"\u03b7", "&yacute;" : u"\u00fd", 
                   "&Rho;" : u"\u03a1", "&darr;" : u"\u2193", "&Ecirc;" : u"\u00ca", 
                   "&zeta;" : u"\u03b6", "&Omega;" : u"\u03a9", "&acirc;" : u"\u00e2", 
                   "&sim;" : u"\u223c", "&phi;" : u"\u03c6", "&sigmaf;" : u"\u03c2", 
                   "&macr;" : u"\u00af", "&thetasym;" : u"\u03d1", "&Ccedil;" : u"\u00c7", 
                   "&ordm;" : u"\u00ba", "&uArr;" : u"\u21d1", "&forall;" : u"\u2200", 
                   "&beta;" : u"\u03b2", "&fnof;" : u"\u0192", "&cup;" : u"\u222a", 
                   "&rho;" : u"\u03c1", "&micro;" : u"\u00b5", "&eacute;" : u"\u00e9", 
                   "&omega;" : u"\u03c9", "&middot;" : u"\u00b7", "&Gamma;" : u"\u0393", 
                   "&euro;" : u"\u20ac", "&lang;" : u"\u2329", "&spades;" : u"\u2660", 
                   "&rsquo;" : u"\u2019", "&thorn;" : u"\u00fe", "&ouml;" : u"\u00f6", 
                   "&or;" : u"\u2228", "&raquo;" : u"\u00bb", "&Lambda;" : u"\u039b", 
                   "&part;" : u"\u2202", "&sect;" : u"\u00a7", "&ldquo;" : u"\u201c", 
                   "&hearts;" : u"\u2665", "&sigma;" : u"\u03c3", "&oacute;" : u"\u00f3" }

