Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/env python
"""
Convert [tables](https://github.com/trentm/python-markdown2/wiki/tables)
a given Markdown document such that columns are aligned.
Limitations:
- Can't handle tables where cells have a pipe.
"""
from __future__ import print_function
__version__ = "1.0.0"
import codecs
import os
from pprint import pprint, pformat
import re
import sys
from collections import defaultdict
p = print
def e(*args, **kwargs):
kwargs['file'] = sys.stderr
p(*args, **kwargs)
#---- internal support stuff
def tables_align_columns(path):
def _table_sub(match):
head, underline, body = match.groups()
data_rows = [
[cell.strip() for cell in head.strip().strip('|').split('|')],
]
for line in body.strip('\n').split('\n'):
data_rows.append([cell.strip() for cell in line.strip().strip('|').split('|')])
width_from_col_idx = defaultdict(int)
for data_row in data_rows:
for col_idx, cell in enumerate(data_row):
width_from_col_idx[col_idx] = max(
2, width_from_col_idx[col_idx], len(cell))
# Determine aligns for columns.
ucells = [cell.strip() for cell in underline.strip('| \t\n').split('|')]
align_from_col_idx = {}
for col_idx, cell in enumerate(ucells):
if cell[0] == ':' and cell[-1] == ':':
align_from_col_idx[col_idx] = 'center'
elif cell[0] == ':':
align_from_col_idx[col_idx] = 'left'
elif cell[-1] == ':':
align_from_col_idx[col_idx] = 'right'
else:
align_from_col_idx[col_idx] = None
table = []
for data_row in data_rows:
row = []
#e('align_from_col_idx:', align_from_col_idx)
#e('data_row:', data_row)
for col_idx, cell in enumerate(data_row):
width = width_from_col_idx[col_idx]
try:
align = align_from_col_idx[col_idx]
except KeyError:
# Limitation: We hit a table row where a cell has a
# literal `|` in it. We can't currently handle that, so
# lets just skip this table.
e('tables-align-columns: warning: skipping a table '
'with literal `|`: %r' % match.group(0))
return match.group(0)
if align == 'center':
space = width - len(cell)
left = space / 2
right = space - left
row.append(' '*left + cell + ' '*right)
elif align == 'right':
row.append('%%%ds' % width % cell)
else:
row.append('%%-%ds' % width % cell)
table.append(row)
underline = []
for col_idx, cell in enumerate(data_rows[0]):
width = width_from_col_idx[col_idx]
align = align_from_col_idx[col_idx]
if align == 'center':
underline.append(':' + u'-'*(width-2) + ':')
elif align == 'right':
underline.append(u'-'*(width-1) + ':')
elif align == 'left':
underline.append(':' + u'-'*(width-1))
else:
underline.append(u'-'*width)
table[1:1] = [underline]
#e(pformat(table, width=200))
table_str = u'\n'.join(('| ' + u' | '.join(r) + ' |') for r in table)
return table_str + '\n'
text = codecs.open(path, 'rb', 'utf8').read()
less_than_tab = 3
table_re = re.compile(r'''
(?:(?<=\n\n)|\A\n?) # leading blank line
^[ ]{0,%d} # allowed whitespace
(.*[|].*) \n # $1: header row (at least one pipe)
^[ ]{0,%d} # allowed whitespace
( # $2: underline row
# underline row with leading bar
(?: \|\ *:?-+:?\ * )+ \|? \n
|
# or, underline row without leading bar
(?: \ *:?-+:?\ *\| )+ (?: \ *:?-+:?\ * )? \n
)
( # $3: data rows
(?:
^[ ]{0,%d}(?!\ ) # ensure line begins with 0 to less_than_tab spaces
.*\|.* \n
)+
)
''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
return table_re.sub(_table_sub, text)
#---- mainline
def main(argv):
for path in argv[1:]:
text = tables_align_columns(path)
sys.stdout.write(text.encode(
sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
if __name__ == "__main__":
sys.exit( main(sys.argv) )