Skip to content

Commit d8cfe40

Browse files
committed
viz and coloring nodes by cluster label
1 parent 4f42256 commit d8cfe40

10 files changed

+33
-161
lines changed

clustering.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def greedy_clustering_on_graph(
1111
metric=string_similar_probability,
1212
threshold=0.8):
1313
def get_text(n):
14-
return '{} {}'.format(
14+
return u'{} {}'.format(
1515
g.node[n]['subject'], g.node[n]['body']
1616
)
1717
cluster_assignment = {}

dump_events_to_json.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,14 @@ def run(candidate_tree_path,
4040
assignment = greedy_clustering_on_graph(e)
4141
for n in e.nodes_iter():
4242
e.node[n]['cluster_label'] = assignment[n]
43-
43+
4444
if to_original_graph:
4545
events = map(convert_to_original_graph,
4646
events)
4747

4848
d3_events = [to_d3_graph(e)
4949
for e in events]
50+
5051
json_dump(d3_events, output_path)
5152

5253

html/js/main.js

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ $(document).ready(function(){
2525
var palette = d3.scale.ordinal()
2626
.domain([EDGE_BROADCAST, EDGE_REPLY, EDGE_RELAY])
2727
.range(d3.scale.category10().range());
28-
var format_time = d3.time.format("%Y-%m-%d");
28+
var cluster_palette = d3.scale.category20c();
2929

30+
var format_time = d3.time.format("%Y-%m-%d");
3031

3132
function get_config(mc){
3233
var url_dict = {
@@ -86,7 +87,7 @@ $(document).ready(function(){
8687
},
8788
'meta_graph': {
8889
svg: {width: 1280, height: 1500},
89-
force: {charge: -1000, linkDistance: 150},
90+
force: {charge: -500, linkDistance: 50},
9091
tip: {
9192
html: function(d){
9293
console.log('iteraction:', d);
@@ -96,11 +97,13 @@ $(document).ready(function(){
9697
d['recipients_str'] = _.map(d['recipients'], function(r){
9798
return r.name;
9899
}).join(', ');
99-
return dict2html(d, ['subject', 'body', 'sender_str', 'recipients_str', 'date', 'message_id']);
100+
return dict2html(d, ['subject', 'body', 'hashtags', 'sender_str', 'recipients_str', 'date', 'message_id']);
100101
}
101102
},
102103
node: {
103-
fill: 'red',
104+
fill: function(d){
105+
return cluster_palette(d['cluster_label']);
106+
},
104107
r: 8,
105108
label: dataset_setting.node_label
106109
},
@@ -174,7 +177,6 @@ $(document).ready(function(){
174177
mc,
175178
url_dict
176179
);
177-
console.log('ret["force"]:', ret['force']);
178180
var charge_from_input = parseInt($("#charge").val());
179181
if(charge_from_input){
180182
ret.force.charge = charge_from_input;

html/js/util.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ function init_dataset_and_paths_widget(paths_json_name){
4040
$('#dataset').on('change', function(){
4141
d3.json("data/" + $(this).val() + "/" + paths_json_name,
4242
function(error, result_paths) {
43+
if(error != null){
44+
console.log(error);
45+
}
46+
4347
$('#dataPathSelection').children().remove();
4448
_.each(result_paths.sort(), function(p, index){
4549
var opt;

html/js/viz.js

Lines changed: 6 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,12 @@ function load_event_1(config){
9191
.on('mouseover', tip.show)
9292
.on('mouseout', tip.hide)
9393

94-
var node_labels = gnodes.append("text")
95-
.text(config.node.label)
96-
.attr('font-size', 10)
97-
.attr('font-weight', 'bold');
94+
if(false){
95+
var node_labels = gnodes.append("text")
96+
.text(config.node.label)
97+
.attr('font-size', 10)
98+
.attr('font-weight', 'bold');
99+
}
98100

99101
force.on("tick", function() {
100102
link.attr("x1", function(d) { return d.source.x; })
@@ -117,147 +119,3 @@ function load_event_1(config){
117119
});
118120
});
119121
}
120-
121-
122-
123-
// ######## DEPRECATED ########
124-
function load_event(data_path, kth){
125-
var width = 960,
126-
height = 1000;
127-
128-
var EDGE_BROADCAST = 1, EDGE_REPLY = 2, EDGE_RELAY = 3;
129-
var palette = d3.scale.ordinal()
130-
.domain([EDGE_BROADCAST, EDGE_REPLY, EDGE_RELAY])
131-
.range(d3.scale.category10().range());
132-
133-
var format_time = d3.time.format("%Y-%m-%d");
134-
135-
var force = d3.layout.force()
136-
.charge(-150)
137-
.linkDistance(500)
138-
.size([width, height]);
139-
140-
var svg = d3.select("body").append("svg")
141-
.attr("width", width)
142-
.attr("height", height);
143-
144-
svg.append("svg:defs")
145-
.append("svg:marker")
146-
.attr("id", "triangle")
147-
.attr("viewBox", "0 -5 10 10")
148-
.attr("refX", 15)
149-
.attr("refY", -1.5)
150-
.attr("markerWidth", 3)
151-
.attr("markerHeight", 3)
152-
.attr("orient", "auto")
153-
.append("svg:path")
154-
.attr("d", "M0,-5L10,0L0,5");
155-
156-
d3.json("data/id2interaction.json", function(error, id2interactions) {
157-
d3.json("data/id2people.json", function(error, id2people) {
158-
d3.json(data_path, function(error, graphs) {
159-
var tip = d3.tip()
160-
.attr('class', 'd3-tip')
161-
// .offset([100, 20])
162-
.html(function(n) {
163-
var i = id2interactions[n['message_id']];
164-
i['date'] = format_time(new Date(i['datetime']*1000));
165-
i['sender'] = id2people[i['sender_id']]['email'].replace("@enron.com", "");
166-
i['recipients'] = _.map(i['recipient_ids'], function(k){
167-
return id2people[k]['email'].replace("@enron.com", "");
168-
}).join(" ");
169-
console.log('iteraction:', i);
170-
return dict2html(i, ['subject', 'body', 'sender', 'recipients', 'date', 'message_id']);
171-
});
172-
173-
svg.call(tip);
174-
175-
var graph = graphs[kth];
176-
177-
if (error) throw error;
178-
179-
force
180-
.nodes(graph.nodes)
181-
.links(graph.edges)
182-
.start();
183-
184-
var link = svg.selectAll(".link")
185-
.data(graph.edges)
186-
.enter().append("line")
187-
.attr("class", "link")
188-
.attr("marker-end", "url(#triangle)")
189-
.attr("stroke", function(d){
190-
var s = d['source'], t = d['target'];
191-
if(s["sender_id"] == t["sender_id"]){
192-
return palette(EDGE_BROADCAST); // broadcast
193-
}
194-
else if(_.intersection(s["recipient_ids"], [t["sender_id"]]) &&
195-
_.intersection(t["recipient_ids"], [s["sender_id"]])){
196-
return palette(EDGE_REPLY); // reply
197-
}
198-
else if(_.intersection(s["recipient_ids"], [t["sender_id"]]) &&
199-
!_.intersection(t["recipient_ids"], [s["sender_id"]])){
200-
return palette(EDGE_RELAY); // relay
201-
}
202-
})
203-
.attr("stroke-width", function(d){
204-
if(d['event']){
205-
return 4;
206-
}else{
207-
return 1;
208-
}
209-
})
210-
.attr("opacity", function(d){
211-
if(d['event']){
212-
return 1;
213-
}else{
214-
return 0.5;
215-
}
216-
});
217-
218-
function mouseover_wrapper(d){
219-
tip.show(d);
220-
link.style('stroke-width', function(l) {
221-
if (d === l.source)
222-
return 2;
223-
else
224-
return 1;
225-
});
226-
}
227-
function mouseout_wrapper(d){
228-
tip.hide(d);
229-
link.style('stroke-width', 1);
230-
}
231-
var node = svg.selectAll(".node")
232-
.data(graph.nodes)
233-
.enter().append("circle")
234-
.attr("class", "node")
235-
.attr("r", 5)
236-
// .style("fill", palette(0))
237-
.style("fill", function(d){
238-
if(d['event']){
239-
return palette(0);
240-
}else{
241-
return '#eee';
242-
}
243-
})
244-
.call(force.drag)
245-
// .on('mouseover', tip.show)
246-
// .on('mouseout', tip.hide)
247-
.on('mouseover', mouseover_wrapper)
248-
.on('mouseout', mouseout_wrapper)
249-
250-
251-
force.on("tick", function() {
252-
link.attr("x1", function(d) { return d.source.x; })
253-
.attr("y1", function(d) { return d.source.y; })
254-
.attr("x2", function(d) { return d.target.x; })
255-
.attr("y2", function(d) { return d.target.y; });
256-
257-
node.attr("cx", function(d) { return d.x; })
258-
.attr("cy", function(d) { return d.y; });
259-
});
260-
});
261-
});
262-
});
263-
}

html/meta_graph_display.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
<body>
3434
<div>
3535
<select id="dataset">
36+
<option value="twitter">Twitter</option>
3637
<option value="enron">Enron</option>
3738
<option value="islamic">Islamic forum</option>
3839
<option value="sklearn">Sklearn(Github repository)</option>

html/timeline.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
<div>
1414
Dataset:
1515
<select id="dataset">
16+
<option value="twitter">Twitter</option>
1617
<option value="enron">Enron</option>
1718
<option value="islamic">Islamic forum</option>
1819
<option value="sklearn">Sklearn(Github repository)</option>

meta_graph_stat.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,9 @@ def populate_user_info(counter):
178178
return Counter(data)
179179

180180
result['sender_count'] = Counter(
181-
[peopleid2info[
182-
id2interaction[self.g.node[n]['message_id']]['sender_id']]
181+
[peopleid2info.get(
182+
id2interaction[self.g.node[n]['message_id']]['sender_id'],
183+
'unknown')
183184
for n in self.g.nodes()]
184185
)
185186

util.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import math
44
import gensim
55
from datetime import datetime
6+
from collections import defaultdict
67

78

89
def load_items_by_line(path):
@@ -21,14 +22,17 @@ def load_id2obj_dict(path, id_key):
2122
interactions = json.load(open(path))
2223
except ValueError:
2324
interactions = load_json_by_line(path)
24-
return {i[id_key]: i
25-
for i in interactions}
25+
d = defaultdict(lambda: {'id': 'unknown', 'name': 'unknown'})
26+
for i in interactions:
27+
d[i[id_key]] = i
28+
return d
2629

2730

2831
def get_datetime(obj):
2932
if isinstance(obj, datetime):
3033
return obj
31-
elif (isinstance(obj, float) or isinstance(obj, int)) and not math.isnan(obj):
34+
elif (isinstance(obj, float) or
35+
isinstance(obj, int)) and not math.isnan(obj):
3236
return datetime.fromtimestamp(obj)
3337
elif isinstance(obj, long):
3438
return datetime.fromtimestamp(obj / 1000)

viz_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ def to_d3_graph(g):
55
data = {'nodes': [], 'edges': []}
66
for n in g.nodes_iter():
77
node = g.node[n]
8-
for f in ('topics', 'bow'):
8+
for f in ('topics', 'bow', 'hashtag_bow'):
99
if f in node:
1010
del node[f]
1111

0 commit comments

Comments
 (0)