[369] | 1 | #!/usr/bin/env python
|
---|
| 2 | #
|
---|
| 3 | # File informatie van NU.nl op de CLI
|
---|
| 4 | #
|
---|
| 5 | # Licence: BSDLike - http://rickvanderzwet.nl/LICENSE
|
---|
| 6 | # Rick van der Zwet <info@rickvanderzwet.nl>
|
---|
| 7 | from bs4 import BeautifulSoup
|
---|
| 8 | import re
|
---|
| 9 | import sys
|
---|
| 10 | import urllib2
|
---|
| 11 |
|
---|
| 12 | rit = { 'werk' : ['N11', 'A44', ] }
|
---|
| 13 |
|
---|
| 14 | def usage():
|
---|
| 15 | print "Gebruik: %s [rit|weg] <naam> [<naam2> ...]" % sys.argv[0]
|
---|
| 16 | exit(128)
|
---|
| 17 |
|
---|
| 18 | # Zoek uit welke wegen we moeten zoeken
|
---|
| 19 | wegen_filter = []
|
---|
| 20 | if len(sys.argv) > 1:
|
---|
| 21 | if len(sys.argv) == 2:
|
---|
| 22 | usage()
|
---|
| 23 | elif sys.argv[1] == 'rit':
|
---|
| 24 | for naam in sys.argv[2:]:
|
---|
| 25 | if not rit.has_key(naam):
|
---|
| 26 | print "Geen rit '{0}' bekend (opties: {1})".format(naam, rit.keys())
|
---|
| 27 | exit(128)
|
---|
| 28 | wegen_filter.extend(rit[naam])
|
---|
| 29 | elif sys.argv[1] == 'weg':
|
---|
| 30 | wegen_filter = sys.argv[2:]
|
---|
| 31 | else:
|
---|
| 32 | usage()
|
---|
| 33 | wegen_filter = sorted(set(wegen_filter))
|
---|
| 34 |
|
---|
| 35 | # Vind de wegen bij NU.nl
|
---|
| 36 | FILE_URL = 'http://www.nu.nl/verkeer/index.html'
|
---|
| 37 | soup = BeautifulSoup(urllib2.urlopen(FILE_URL))
|
---|
| 38 | middle = soup.find('div', { 'class' : 'page_content' })
|
---|
| 39 | file_len = 0
|
---|
| 40 | for f in middle.find_all('p', { 'style' : 'clear: both' }):
|
---|
| 41 | text = re.sub('[ ]+',' ', f.get_text().strip().replace('\n', '-'))
|
---|
| 42 | if len(list(f.children)) != 8:
|
---|
| 43 | print "# {0}\n".format(text)
|
---|
| 44 | continue
|
---|
| 45 | file_len += 1
|
---|
| 46 | if wegen_filter and not any(['file: {0} '.format(weg.lower()) in text.lower() for weg in wegen_filter]):
|
---|
| 47 | continue
|
---|
| 48 | print text
|
---|
| 49 |
|
---|
| 50 | print ""
|
---|
| 51 | if wegen_filter: print "# Gefiltered op wegen: {0}".format(", ".join(wegen_filter))
|
---|
| 52 | print "# Data van: {0}".format(FILE_URL)
|
---|
| 53 | print "# Totaal {0} files".format(file_len)
|
---|
| 54 |
|
---|