[pre=#0C1021]if bulletin_board:
# print css
bbcode = “[pre[/color]=%s]” % dict(cssNone]).get(‘background-color’, ‘#000000’)]
A = bbcode.append
from lxml.html import fromstring
import re
TAG = re.compile(r'(\\w+|\/\w+)', re.M | re.S)
def crude_tokenizer(text): # TODO: this would be a better algorithm for `inversion_stream`
"Yields"
last_end = end = 0
for match in TAG.finditer(text):
start, end = match.span()
if start != last_end:
yield text[last_end[/color]:start]
yield text[start[/color]:end]
last_end = end
token_length = len(text)
if end < token_length:
yield text[end[/color]:token_length]
for txt in fromstring(html).xpath('//text()'):
if txt.is_tail or txt.is_text:
parent = txt.getparent()
if txt.is_tail: parent = parent.getparent()
style = None
while parent is not None:
style = parent.get('style')
if style: break
else: parent = parent.getparent()
color = re.search(r'(?<!background-)color:(#[A[/color]-Fa-f0-9]+)', parent.get('style'))
if color:
color = color.group(1)
else:
color = dict(cssNone]).get('color', '#000000')
color_wrap = lambda t: "=%s]%s]" % (color, t)
for t__ in crude_tokenizer(txt):
A(color_wrap(t__))
html = "".join(bbcode + "[/pre[/color]]"])
[/pre]
That seems to work, making sure to wrap any tag looking things in a bbcode element so as to break em up