Download this jupyter notebook for an interactive experience.
Python Environment
For ssh tunelling: ssh -i (private key file) -L 4367:localhost:4367 ec2-user@111.222.333.444
start by importing xgt module and creating a connection object.
import xgt
conn = xgt.Connection()
Are we running and connected via SSH tunneling?
conn.server_version
'0.17.0'
Define vertex and Netflow edge schema
conn.drop_frame('Netflow')
conn.drop_frame('Wls1v')
conn.drop_frame('Devices')
dev = conn.create_vertex_frame(name = 'Devices',
schema = [['Device', xgt.TEXT]],
key = 'Device')
nf = conn.create_edge_frame(name = 'Netflow',
schema = [['epochtime',xgt.INT],
['duration',xgt.INT],
['srcDevice',xgt.TEXT],
['dstDevice',xgt.TEXT],
['protocol',xgt.INT],
['srcPort',xgt.INT],
['dstPort',xgt.INT],
['srcPackets',xgt.INT],
['dstPackets',xgt.INT],
['srcBytes',xgt.INT],
['dstBytes',xgt.INT]],
source = 'Devices',
target = 'Devices',
source_key = 'srcDevice',
target_key = 'dstDevice')
Another edge type schema
wls1v = conn.create_edge_frame(name = 'Wls1v',
schema =[['epochtime',xgt.INT],
['eventID',xgt.INT],
['logHost',xgt.TEXT],
['userName',xgt.TEXT],
['domainName',xgt.TEXT],
['logonID',xgt.INT],
['processName',xgt.TEXT],
['processID',xgt.INT],
['parentProcessName',xgt.TEXT],
['parentProcessID',xgt.INT]],
source = 'Devices',
target = 'Devices',
source_key = 'logHost',
target_key = 'logHost')
Load server log data
import time
start_time = time.time()
urls = []
for i in range(4,5):
urls.append("http://datasets.trovares.com/LANL/xgt/wls_day-%02d_1v.csv" % int(i))
wls1v.load(urls)
end_time = time.time()
print("Wsl load time: %.3f seconds" % (end_time - start_time))
Wsl load time: 26.505 seconds
Load netflow data
import time
start_time = time.time()
urls = []
for i in range(4,5):
urls.append("http://datasets.trovares.com/LANL/xgt/nf_day-%02d.csv" % int(i))
nf.load(urls)
end_time = time.time()
print("Netflow load time: %.3f seconds" % (end_time - start_time))
Netflow load time: 254.100 seconds
How much data did we load?
num_devices = dev.num_vertices
num_netflow = nf.num_edges
num_wls_1v = wls1v.num_edges
print("Rows of Devices data: " + "{:,}".format(num_devices))
print("Rows of Netflow data: " + "{:,}".format(num_netflow))
print("Rows of Wls 1V data: " + "{:,}".format(num_wls_1v))
Rows of Devices data: 157,949
Rows of Netflow data: 222,323,503
Rows of Wls 1V data: 16,402,438
Graph Pattern of interest
Search for just a boot event
q = """
MATCH (dev1)-[boot:Wls1v]->(dev1)
WHERE boot.eventID = 4608
RETURN COUNT(*)
INTO answers
"""
def run_query_count(query):
conn.drop_frame('answers')
start_time = time.time()
conn.run_job(query)
end_time = time.time()
print("Query time: %.3f seconds" % (end_time - start_time))
# Retrieve count
table = conn.get_table_frame('answers')
count = table.get_data()[0][0]
return count
count = run_query_count(q)
print('Number of boot events: ' + "{:,}".format(count))
Query time: 0.329 seconds
Number of boot events: 1,712
Boot event followed by Program Start event
q = """
MATCH (dev1)-[boot:Wls1v]->(dev1)-[ps:Wls1v]->(dev1)
WHERE boot.eventID = 4608
AND ps.eventID = 4688
AND ps.epochtime >= boot.epochtime
AND ps.epochtime - boot.epochtime < 4
RETURN COUNT(*)
INTO answers
"""
count = run_query_count(q)
print('Number of boot==>programstart events: ' + "{:,}".format(count))
Query time: 0.921 seconds
Number of boot==>programstart events: 253,009
Boot, PS, and C2-connect
q = """
MATCH (dev2)-[nf0]->(dev1)-[boot:Wls1v]->(dev1)-[ps:Wls1v]->(dev1)
WHERE dev1 <> dev2
AND nf0.srcPort = 3128
AND boot.eventID = 4608
AND ps.eventID = 4688
AND ps.epochtime >= boot.epochtime
AND nf0.epochtime >= ps.epochtime
AND nf0.epochtime - boot.epochtime <= 3
RETURN COUNT(*)
INTO answers
"""
# Note the overall time limit on the sequence of the three events
count = run_query_count(q)
print('Number of boot==>programstart==>nf0 events: ' + "{:,}".format(count))
Query time: 4.164 seconds
Number of boot==>programstart==>nf0 events: 109
Zombie Reboot
q = """
MATCH (dev2)-[nf0]->(dev1)-[boot:Wls1v]->(dev1)-[ps:Wls1v]->(dev1),
(dev2)-[nf1]->(dev3)
WHERE dev1 <> dev2 AND dev2 <> dev3 AND dev1 <> dev3
AND nf0.srcPort = 3128
AND boot.eventID = 4608
AND ps.eventID = 4688
AND ps.epochtime >= boot.epochtime
AND nf0.epochtime >= ps.epochtime
AND nf0.epochtime - boot.epochtime <= 3
AND nf1.duration >= 3600
AND nf1.epochtime < nf0.epochtime
AND nf1.epochtime + nf1.duration >= nf0.epochtime
RETURN COUNT(*)
INTO answers
"""
count = run_query_count(q)
print('Number of Zombie Reboot events: ' + "{:,}".format(count))
Query time: 16.352 seconds
Number of Zombie Reboot events: 981