forked from ethanchewy/PythonBuddy
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgraphparse.py
More file actions
160 lines (145 loc) · 5.32 KB
/
graphparse.py
File metadata and controls
160 lines (145 loc) · 5.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""
Graph file parsing.
"""
import sys, re
import subprocess
import msgstruct
re_nonword = re.compile(r'([^0-9a-zA-Z_.]+)')
re_plain = re.compile(r'graph [-0-9.]+ [-0-9.]+ [-0-9.]+$', re.MULTILINE)
re_digraph = re.compile(r'\b(graph|digraph)\b', re.IGNORECASE)
def guess_type(content):
# try to see whether it is a directed graph or not,
# or already a .plain file
# XXX not a perfect heursitic
if re_plain.match(content):
return 'plain' # already a .plain file
# look for the word 'graph' or 'digraph' followed by a '{'.
bracepos = None
lastfound = ''
for match in re_digraph.finditer(content):
position = match.start()
if bracepos is None:
bracepos = content.find('{', position)
if bracepos < 0:
break
elif position > bracepos:
break
lastfound = match.group()
if lastfound.lower() == 'digraph':
return 'dot'
if lastfound.lower() == 'graph':
return 'neato'
print >> sys.stderr, "Warning: could not guess file type, using 'dot'"
return 'unknown'
def dot2plain_graphviz(content, contenttype, use_codespeak=False):
if contenttype != 'neato':
cmdline = 'dot -Tplain'
else:
cmdline = 'neato -Tplain'
#print >> sys.stderr, '* running:', cmdline
close_fds = sys.platform != 'win32'
p = subprocess.Popen(cmdline, shell=True, close_fds=close_fds,
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
(child_in, child_out) = (p.stdin, p.stdout)
try:
import thread
except ImportError:
bkgndwrite(child_in, content)
else:
thread.start_new_thread(bkgndwrite, (child_in, content))
plaincontent = child_out.read()
child_out.close()
if not plaincontent: # 'dot' is likely not installed
raise PlainParseError("no result from running 'dot'")
return plaincontent
def dot2plain_codespeak(content, contenttype):
import urllib
request = urllib.urlencode({'dot': content})
url = 'http://codespeak.net/pypy/convertdot.cgi'
print >> sys.stderr, '* posting:', url
g = urllib.urlopen(url, data=request)
result = []
while True:
data = g.read(16384)
if not data:
break
result.append(data)
g.close()
plaincontent = ''.join(result)
# very simple-minded way to give a somewhat better error message
if plaincontent.startswith('<body'):
raise Exception("the dot on codespeak has very likely crashed")
return plaincontent
def bkgndwrite(f, data):
f.write(data)
f.close()
class PlainParseError(Exception):
pass
def splitline(line, re_word = re.compile(r'[^\s"]\S*|["]["]|["].*?[^\\]["]')):
import ast
result = []
for word in re_word.findall(line):
if word.startswith('"'):
word = ast.literal_eval(word)
result.append(word)
return result
def parse_plain(graph_id, plaincontent, links={}, fixedfont=False):
plaincontent = plaincontent.replace('\r\n', '\n') # fix Windows EOL
lines = plaincontent.splitlines(True)
for i in range(len(lines)-2, -1, -1):
if lines[i].endswith('\\\n'): # line ending in '\'
lines[i] = lines[i][:-2] + lines[i+1]
del lines[i+1]
header = splitline(lines.pop(0))
if header[0] != 'graph':
raise PlainParseError("should start with 'graph'")
yield (msgstruct.CMSG_START_GRAPH, graph_id) + tuple(header[1:])
texts = []
for line in lines:
line = splitline(line)
if line[0] == 'node':
if len(line) != 11:
raise PlainParseError("bad 'node'")
yield (msgstruct.CMSG_ADD_NODE,) + tuple(line[1:])
texts.append(line[6])
if line[0] == 'edge':
yield (msgstruct.CMSG_ADD_EDGE,) + tuple(line[1:])
i = 4 + 2 * int(line[3])
if len(line) > i + 2:
texts.append(line[i])
if line[0] == 'stop':
break
if links:
# only include the links that really appear in the graph
seen = {}
for text in texts:
for word in re_nonword.split(text):
if word and word in links and word not in seen:
t = links[word]
if isinstance(t, tuple):
statusbartext, color = t
else:
statusbartext = t
color = None
if color is not None:
yield (msgstruct.CMSG_ADD_LINK, word,
statusbartext, color[0], color[1], color[2])
else:
yield (msgstruct.CMSG_ADD_LINK, word, statusbartext)
seen[word] = True
if fixedfont:
yield (msgstruct.CMSG_FIXED_FONT,)
yield (msgstruct.CMSG_STOP_GRAPH,)
def parse_dot(graph_id, content, links={}, fixedfont=False):
contenttype = guess_type(content)
if contenttype == 'plain':
plaincontent = content
else:
try:
plaincontent = dot2plain_graphviz(content, contenttype)
except PlainParseError, e:
raise
##print e
### failed, retry via codespeak
##plaincontent = dot2plain_codespeak(content, contenttype)
return list(parse_plain(graph_id, plaincontent, links, fixedfont))