| #! /usr/bin/env python |
| |
| """world -- Print mappings between country names and DNS country codes. |
| |
| Contact: Barry Warsaw |
| Email: barry@python.org |
| Version: %(__version__)s |
| |
| This script will take a list of Internet addresses and print out where in the |
| world those addresses originate from, based on the top-level domain country |
| code found in the address. Addresses can be in any of the following forms: |
| |
| xx -- just the country code or top-level domain identifier |
| host.domain.xx -- any Internet host or network name |
| somebody@where.xx -- an Internet email address |
| |
| If no match is found, the address is interpreted as a regular expression and a |
| reverse lookup is attempted. This script will search the country names and |
| print a list of matching entries. You can force reverse mappings with the |
| `-r' flag (see below). |
| |
| For example: |
| |
| %% world tz us |
| tz originated from Tanzania, United Republic of |
| us originated from United States |
| |
| %% world united |
| united matches 6 countries: |
| ae: United Arab Emirates |
| uk: United Kingdom (common practice) |
| um: United States Minor Outlying Islands |
| us: United States |
| tz: Tanzania, United Republic of |
| gb: United Kingdom |
| |
| Country codes are maintained by the RIPE Network Coordination Centre, |
| in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The |
| authoritative source of country code mappings is: |
| |
| <url:ftp://ftp.ripe.net/iso3166-countrycodes.txt> |
| |
| The latest known change to this information was: |
| |
| Friday, 5 April 2002, 12.00 CET 2002 |
| |
| This script also knows about non-geographic top-level domains, and the |
| additional ccTLDs reserved by IANA. |
| |
| Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...] |
| |
| --dump |
| -d |
| Print mapping of all top-level domains. |
| |
| --parse file |
| -p file |
| Parse an iso3166-countrycodes file extracting the two letter country |
| code followed by the country name. Note that the three letter country |
| codes and numbers, which are also provided in the standard format |
| file, are ignored. |
| |
| --outputdict |
| -o |
| When used in conjunction with the `-p' option, output is in the form |
| of a Python dictionary, and country names are normalized |
| w.r.t. capitalization. This makes it appropriate for cutting and |
| pasting back into this file. Output is always to standard out. |
| |
| --reverse |
| -r |
| Force reverse lookup. In this mode the address can be any Python |
| regular expression; this is matched against all country names and a |
| list of matching mappings is printed. In normal mode (e.g. without |
| this flag), reverse lookup is performed on addresses if no matching |
| country code is found. |
| |
| -h |
| --help |
| Print this message. |
| """ |
| __version__ = '$Revision: 27624 $' |
| |
| |
| import sys |
| import getopt |
| import re |
| |
| PROGRAM = sys.argv[0] |
| |
| |
| |
| def usage(code, msg=''): |
| print __doc__ % globals() |
| if msg: |
| print msg |
| sys.exit(code) |
| |
| |
| |
| def resolve(rawaddr): |
| parts = rawaddr.split('.') |
| if not len(parts): |
| # no top level domain found, bounce it to the next step |
| return rawaddr |
| addr = parts[-1] |
| if nameorgs.has_key(addr): |
| print rawaddr, 'is in the', nameorgs[addr], 'top level domain' |
| return None |
| elif countries.has_key(addr): |
| print rawaddr, 'originated from', countries[addr] |
| return None |
| else: |
| # Not resolved, bounce it to the next step |
| return rawaddr |
| |
| |
| |
| def reverse(regexp): |
| matches = [] |
| cre = re.compile(regexp, re.IGNORECASE) |
| for code, country in all.items(): |
| mo = cre.search(country) |
| if mo: |
| matches.append(code) |
| # print results |
| if not matches: |
| # not resolved, bounce it to the next step |
| return regexp |
| if len(matches) == 1: |
| code = matches[0] |
| print regexp, "matches code `%s', %s" % (code, all[code]) |
| else: |
| print regexp, 'matches %d countries:' % len(matches) |
| for code in matches: |
| print " %s: %s" % (code, all[code]) |
| return None |
| |
| |
| |
| def parse(file, normalize): |
| try: |
| fp = open(file) |
| except IOError, (err, msg): |
| print msg, ':', file |
| |
| cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}') |
| scanning = 0 |
| |
| if normalize: |
| print 'countries = {' |
| |
| while 1: |
| line = fp.readline() |
| if line == '': |
| break # EOF |
| if scanning: |
| mo = cre.match(line) |
| if not mo: |
| line = line.strip() |
| if not line: |
| continue |
| elif line[0] == '-': |
| break |
| else: |
| print 'Could not parse line:', line |
| continue |
| country, code = mo.group(1, 2) |
| if normalize: |
| words = country.split() |
| for i in range(len(words)): |
| w = words[i] |
| # XXX special cases |
| if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'): |
| words[i] = w.lower() |
| elif w == 'THE' and i <> 1: |
| words[i] = w.lower() |
| elif len(w) > 3 and w[1] == "'": |
| words[i] = w[0:3].upper() + w[3:].lower() |
| elif w in ('(U.S.)', 'U.S.'): |
| pass |
| elif w[0] == '(' and w <> '(local': |
| words[i] = '(' + w[1:].capitalize() |
| elif w.find('-') <> -1: |
| words[i] = '-'.join( |
| [s.capitalize() for s in w.split('-')]) |
| else: |
| words[i] = w.capitalize() |
| code = code.lower() |
| country = ' '.join(words) |
| print ' "%s": "%s",' % (code, country) |
| else: |
| print code, country |
| |
| elif line[0] == '-': |
| scanning = 1 |
| |
| if normalize: |
| print ' }' |
| |
| |
| def main(): |
| help = 0 |
| status = 0 |
| dump = 0 |
| parsefile = None |
| normalize = 0 |
| forcerev = 0 |
| |
| try: |
| opts, args = getopt.getopt( |
| sys.argv[1:], |
| 'p:rohd', |
| ['parse=', 'reverse', 'outputdict', 'help', 'dump']) |
| except getopt.error, msg: |
| usage(1, msg) |
| |
| for opt, arg in opts: |
| if opt in ('-h', '--help'): |
| help = 1 |
| elif opt in ('-d', '--dump'): |
| dump = 1 |
| elif opt in ('-p', '--parse'): |
| parsefile = arg |
| elif opt in ('-o', '--outputdict'): |
| normalize = 1 |
| elif opt in ('-r', '--reverse'): |
| forcerev = 1 |
| |
| if help: |
| usage(status) |
| |
| if dump: |
| print 'Non-geographic domains:' |
| codes = nameorgs.keys() |
| codes.sort() |
| for code in codes: |
| print ' %4s:' % code, nameorgs[code] |
| |
| print '\nCountry coded domains:' |
| codes = countries.keys() |
| codes.sort() |
| for code in codes: |
| print ' %2s:' % code, countries[code] |
| elif parsefile: |
| parse(parsefile, normalize) |
| else: |
| if not forcerev: |
| args = filter(None, map(resolve, args)) |
| args = filter(None, map(reverse, args)) |
| for arg in args: |
| print 'Where in the world is %s?' % arg |
| |
| |
| |
| # The mappings |
| nameorgs = { |
| # New top level domains as described by ICANN |
| # http://www.icann.org/tlds/ |
| "aero": "air-transport industry", |
| "arpa": "Arpanet", |
| "biz": "business", |
| "com": "commercial", |
| "coop": "cooperatives", |
| "edu": "educational", |
| "gov": "government", |
| "info": "unrestricted `info'", |
| "int": "international", |
| "mil": "military", |
| "museum": "museums", |
| "name": "`name' (for registration by individuals)", |
| "net": "networking", |
| "org": "non-commercial", |
| "pro": "professionals", |
| # These additional ccTLDs are included here even though they are not part |
| # of ISO 3166. IANA has 5 reserved ccTLDs as described here: |
| # |
| # http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html |
| # |
| # but I can't find an official list anywhere. |
| # |
| # Note that `uk' is the common practice country code for the United |
| # Kingdom. AFAICT, the official `gb' code is routinely ignored! |
| # |
| # <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166 |
| # was adopted for top-level DNS zone names (although in the reverse order |
| # like uk.ac.qmw) and was carried forward (with the reversal) to avoid a |
| # large-scale renaming process as the UK switched from their old `Coloured |
| # Book' protocols over X.25 to Internet protocols over IP. |
| # |
| # See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt> |
| # |
| # Also, `su', while obsolete is still in limited use. |
| "ac": "Ascension Island", |
| "gg": "Guernsey", |
| "im": "Isle of Man", |
| "je": "Jersey", |
| "uk": "United Kingdom (common practice)", |
| "su": "Soviet Union (still in limited use)", |
| } |
| |
| |
| |
| countries = { |
| "af": "Afghanistan", |
| "al": "Albania", |
| "dz": "Algeria", |
| "as": "American Samoa", |
| "ad": "Andorra", |
| "ao": "Angola", |
| "ai": "Anguilla", |
| "aq": "Antarctica", |
| "ag": "Antigua and Barbuda", |
| "ar": "Argentina", |
| "am": "Armenia", |
| "aw": "Aruba", |
| "au": "Australia", |
| "at": "Austria", |
| "az": "Azerbaijan", |
| "bs": "Bahamas", |
| "bh": "Bahrain", |
| "bd": "Bangladesh", |
| "bb": "Barbados", |
| "by": "Belarus", |
| "be": "Belgium", |
| "bz": "Belize", |
| "bj": "Benin", |
| "bm": "Bermuda", |
| "bt": "Bhutan", |
| "bo": "Bolivia", |
| "ba": "Bosnia and Herzegowina", |
| "bw": "Botswana", |
| "bv": "Bouvet Island", |
| "br": "Brazil", |
| "io": "British Indian Ocean Territory", |
| "bn": "Brunei Darussalam", |
| "bg": "Bulgaria", |
| "bf": "Burkina Faso", |
| "bi": "Burundi", |
| "kh": "Cambodia", |
| "cm": "Cameroon", |
| "ca": "Canada", |
| "cv": "Cape Verde", |
| "ky": "Cayman Islands", |
| "cf": "Central African Republic", |
| "td": "Chad", |
| "cl": "Chile", |
| "cn": "China", |
| "cx": "Christmas Island", |
| "cc": "Cocos (Keeling) Islands", |
| "co": "Colombia", |
| "km": "Comoros", |
| "cg": "Congo", |
| "cd": "Congo, The Democratic Republic of the", |
| "ck": "Cook Islands", |
| "cr": "Costa Rica", |
| "ci": "Cote D'Ivoire", |
| "hr": "Croatia", |
| "cu": "Cuba", |
| "cy": "Cyprus", |
| "cz": "Czech Republic", |
| "dk": "Denmark", |
| "dj": "Djibouti", |
| "dm": "Dominica", |
| "do": "Dominican Republic", |
| "tp": "East Timor", |
| "ec": "Ecuador", |
| "eg": "Egypt", |
| "sv": "El Salvador", |
| "gq": "Equatorial Guinea", |
| "er": "Eritrea", |
| "ee": "Estonia", |
| "et": "Ethiopia", |
| "fk": "Falkland Islands (Malvinas)", |
| "fo": "Faroe Islands", |
| "fj": "Fiji", |
| "fi": "Finland", |
| "fr": "France", |
| "gf": "French Guiana", |
| "pf": "French Polynesia", |
| "tf": "French Southern Territories", |
| "ga": "Gabon", |
| "gm": "Gambia", |
| "ge": "Georgia", |
| "de": "Germany", |
| "gh": "Ghana", |
| "gi": "Gibraltar", |
| "gr": "Greece", |
| "gl": "Greenland", |
| "gd": "Grenada", |
| "gp": "Guadeloupe", |
| "gu": "Guam", |
| "gt": "Guatemala", |
| "gn": "Guinea", |
| "gw": "Guinea-Bissau", |
| "gy": "Guyana", |
| "ht": "Haiti", |
| "hm": "Heard Island and Mcdonald Islands", |
| "va": "Holy See (Vatican City State)", |
| "hn": "Honduras", |
| "hk": "Hong Kong", |
| "hu": "Hungary", |
| "is": "Iceland", |
| "in": "India", |
| "id": "Indonesia", |
| "ir": "Iran, Islamic Republic of", |
| "iq": "Iraq", |
| "ie": "Ireland", |
| "il": "Israel", |
| "it": "Italy", |
| "jm": "Jamaica", |
| "jp": "Japan", |
| "jo": "Jordan", |
| "kz": "Kazakstan", |
| "ke": "Kenya", |
| "ki": "Kiribati", |
| "kp": "Korea, Democratic People's Republic of", |
| "kr": "Korea, Republic of", |
| "kw": "Kuwait", |
| "kg": "Kyrgyzstan", |
| "la": "Lao People's Democratic Republic", |
| "lv": "Latvia", |
| "lb": "Lebanon", |
| "ls": "Lesotho", |
| "lr": "Liberia", |
| "ly": "Libyan Arab Jamahiriya", |
| "li": "Liechtenstein", |
| "lt": "Lithuania", |
| "lu": "Luxembourg", |
| "mo": "Macau", |
| "mk": "Macedonia, The Former Yugoslav Republic of", |
| "mg": "Madagascar", |
| "mw": "Malawi", |
| "my": "Malaysia", |
| "mv": "Maldives", |
| "ml": "Mali", |
| "mt": "Malta", |
| "mh": "Marshall Islands", |
| "mq": "Martinique", |
| "mr": "Mauritania", |
| "mu": "Mauritius", |
| "yt": "Mayotte", |
| "mx": "Mexico", |
| "fm": "Micronesia, Federated States of", |
| "md": "Moldova, Republic of", |
| "mc": "Monaco", |
| "mn": "Mongolia", |
| "ms": "Montserrat", |
| "ma": "Morocco", |
| "mz": "Mozambique", |
| "mm": "Myanmar", |
| "na": "Namibia", |
| "nr": "Nauru", |
| "np": "Nepal", |
| "nl": "Netherlands", |
| "an": "Netherlands Antilles", |
| "nc": "New Caledonia", |
| "nz": "New Zealand", |
| "ni": "Nicaragua", |
| "ne": "Niger", |
| "ng": "Nigeria", |
| "nu": "Niue", |
| "nf": "Norfolk Island", |
| "mp": "Northern Mariana Islands", |
| "no": "Norway", |
| "om": "Oman", |
| "pk": "Pakistan", |
| "pw": "Palau", |
| "ps": "Palestinian Territory, Occupied", |
| "pa": "Panama", |
| "pg": "Papua New Guinea", |
| "py": "Paraguay", |
| "pe": "Peru", |
| "ph": "Philippines", |
| "pn": "Pitcairn", |
| "pl": "Poland", |
| "pt": "Portugal", |
| "pr": "Puerto Rico", |
| "qa": "Qatar", |
| "re": "Reunion", |
| "ro": "Romania", |
| "ru": "Russian Federation", |
| "rw": "Rwanda", |
| "sh": "Saint Helena", |
| "kn": "Saint Kitts and Nevis", |
| "lc": "Saint Lucia", |
| "pm": "Saint Pierre and Miquelon", |
| "vc": "Saint Vincent and the Grenadines", |
| "ws": "Samoa", |
| "sm": "San Marino", |
| "st": "Sao Tome and Principe", |
| "sa": "Saudi Arabia", |
| "sn": "Senegal", |
| "sc": "Seychelles", |
| "sl": "Sierra Leone", |
| "sg": "Singapore", |
| "sk": "Slovakia", |
| "si": "Slovenia", |
| "sb": "Solomon Islands", |
| "so": "Somalia", |
| "za": "South Africa", |
| "gs": "South Georgia and the South Sandwich Islands", |
| "es": "Spain", |
| "lk": "Sri Lanka", |
| "sd": "Sudan", |
| "sr": "Suriname", |
| "sj": "Svalbard and Jan Mayen", |
| "sz": "Swaziland", |
| "se": "Sweden", |
| "ch": "Switzerland", |
| "sy": "Syrian Arab Republic", |
| "tw": "Taiwan, Province of China", |
| "tj": "Tajikistan", |
| "tz": "Tanzania, United Republic of", |
| "th": "Thailand", |
| "tg": "Togo", |
| "tk": "Tokelau", |
| "to": "Tonga", |
| "tt": "Trinidad and Tobago", |
| "tn": "Tunisia", |
| "tr": "Turkey", |
| "tm": "Turkmenistan", |
| "tc": "Turks and Caicos Islands", |
| "tv": "Tuvalu", |
| "ug": "Uganda", |
| "ua": "Ukraine", |
| "ae": "United Arab Emirates", |
| "gb": "United Kingdom", |
| "us": "United States", |
| "um": "United States Minor Outlying Islands", |
| "uy": "Uruguay", |
| "uz": "Uzbekistan", |
| "vu": "Vanuatu", |
| "ve": "Venezuela", |
| "vn": "Viet Nam", |
| "vg": "Virgin Islands, British", |
| "vi": "Virgin Islands, U.S.", |
| "wf": "Wallis and Futuna", |
| "eh": "Western Sahara", |
| "ye": "Yemen", |
| "yu": "Yugoslavia", |
| "zm": "Zambia", |
| "zw": "Zimbabwe", |
| } |
| |
| all = nameorgs.copy() |
| all.update(countries) |
| |
| |
| if __name__ == '__main__': |
| main() |