64 lines
1.2 KiB
Bash
Executable File
64 lines
1.2 KiB
Bash
Executable File
#!/bin/sh
|
|
######################################################################
|
|
# quick and dirty diskprices.org parser/converter from html to csv.
|
|
######################################################################
|
|
|
|
_usage() {
|
|
printf "Usage: %s [html_file]\n" "${0}"
|
|
exit 1
|
|
}
|
|
|
|
_parser() {
|
|
grep -E -e '<t(d|h|r)' -e '<a' \
|
|
| sed -E -e 's:.*<tr.*:%:' \
|
|
-e 's:.*<td .*><a.*>(.*)</a></td>.*:\1:' \
|
|
-e 's:^.*<td>(.*)<.*$:\1:' \
|
|
-e 's:^.*<td class=.*>(.*)<.*$:\1:' \
|
|
-e 's:^.*<td class="name">::' \
|
|
-e 's:^.*<th.*>(.*)<.*$:\1:' \
|
|
-e 's:^.*<a.*>(.*)</a.*$:\1:'
|
|
}
|
|
|
|
_to_csv() {
|
|
awk ' BEGIN {
|
|
s = 0;
|
|
b = "";
|
|
}
|
|
$1 ~ /%/ {
|
|
print b;
|
|
b = ""
|
|
}
|
|
$1 ~ /.*/ && $1 !~ /%/ {
|
|
if (b=="") {
|
|
b = $0
|
|
}
|
|
else {
|
|
b = b ";" $0
|
|
}
|
|
}'
|
|
}
|
|
|
|
_csv_cleanup() {
|
|
sed -E \
|
|
-e '/^US;UK;DE;CA;ES;FR;IT;IN/d' \
|
|
-e '/^US;UK;DE;CA;ES/d' \
|
|
-e '/^US;UK;DE;CA;ES;FR;IT/d' \
|
|
-e '/^US;UK;DE;CA;ES;JP/d' \
|
|
-e '/^US;UK;DE;CA;ES;FR;IT;AU;NL/d' \
|
|
-e '/^Libera IRC/d' \
|
|
-e '/^Mailing list/d' \
|
|
-e '/^Learn more/d' \
|
|
-e '/^but how/d' \
|
|
-e '/^$/d' \
|
|
-e '/^webchat/d' \
|
|
-e 's/\&[A-Za-z0-9\#]+\;//g'
|
|
}
|
|
|
|
_main() {
|
|
cat ${1} | _parser | _to_csv | _csv_cleanup
|
|
}
|
|
|
|
test "${1}" || _usage
|
|
test -e "${1}" || _usage
|
|
_main "${1}"
|