1 | #!/usr/bin/env python
|
---|
2 | #
|
---|
3 | # File informatie van NU.nl op de CLI
|
---|
4 | #
|
---|
5 | # Licence: BSDLike - http://rickvanderzwet.nl/LICENSE
|
---|
6 | # Rick van der Zwet <info@rickvanderzwet.nl>
|
---|
7 | from bs4 import BeautifulSoup
|
---|
8 | import re
|
---|
9 | import sys
|
---|
10 | import urllib2
|
---|
11 |
|
---|
12 | rit = { 'werk' : ['N11', 'A44', 'A4', ] }
|
---|
13 |
|
---|
14 | def usage():
|
---|
15 | print "Gebruik: %s [rit|weg] <naam> [<naam2> ...]" % sys.argv[0]
|
---|
16 | exit(128)
|
---|
17 |
|
---|
18 | # Zoek uit welke wegen we moeten zoeken
|
---|
19 | wegen_filter = []
|
---|
20 | if len(sys.argv) > 1:
|
---|
21 | if len(sys.argv) == 2:
|
---|
22 | usage()
|
---|
23 | elif sys.argv[1] == 'rit':
|
---|
24 | for naam in sys.argv[2:]:
|
---|
25 | if not rit.has_key(naam):
|
---|
26 | print "Geen rit '{0}' bekend (opties: {1})".format(naam, rit.keys())
|
---|
27 | exit(128)
|
---|
28 | wegen_filter.extend(rit[naam])
|
---|
29 | elif sys.argv[1] == 'weg':
|
---|
30 | wegen_filter = sys.argv[2:]
|
---|
31 | else:
|
---|
32 | usage()
|
---|
33 | wegen_filter = sorted(set(wegen_filter))
|
---|
34 |
|
---|
35 | # Vind de wegen bij NU.nl
|
---|
36 | FILE_URL = 'http://www.nu.nl/verkeer/index.html'
|
---|
37 | soup = BeautifulSoup(urllib2.urlopen(FILE_URL))
|
---|
38 | middle = soup.find('div', { 'class' : 'page_content' })
|
---|
39 | file_len = 0
|
---|
40 | for f in middle.find_all('p', { 'style' : 'clear: both' }):
|
---|
41 | text = re.sub('[ ]+',' ', f.get_text().strip().replace('\n', '-'))
|
---|
42 | if len(list(f.children)) != 8:
|
---|
43 | print "# {0}\n".format(text)
|
---|
44 | continue
|
---|
45 | file_len += 1
|
---|
46 | if wegen_filter and not any(['file: {0} '.format(weg.lower()) in text.lower() for weg in wegen_filter]):
|
---|
47 | continue
|
---|
48 | print text
|
---|
49 |
|
---|
50 | print ""
|
---|
51 | if wegen_filter: print "# Gefiltered op wegen: {0}".format(", ".join(wegen_filter))
|
---|
52 | print "# Data van: {0}".format(FILE_URL)
|
---|
53 | print "# Totaal {0} files".format(file_len)
|
---|
54 |
|
---|