dictionaries/sample.combined - platform/packages/inputmethods/LatinIME - Git at Google

 # This is a sample wordlist that can be converted to a binary dictionary
 # for use by the Latin IME.
 # The file is essentially a CSV file, with indent level denoting nesting.
 #
 # The file starts with a single CSV line with the header attributes. Whatever
 # the content, these are included as is in the binary file. The first attribute
 # of the file should be `dictionary'. Usual fields are `locale', `description',
 # `date', `version', `options'.
 #
 # Each word has a `word' entry and at least a `f' argument denoting its
 # probability, as an integer between 0 and 255 on a logarithmic scale, with
 # 255 meaning 1 and each decrement in 1 dividing probability by 1.15.
 # As a special case, a weight of 0 is taken to mean profanity - words that
 # should not be considered a typo, but that should never be suggested
 # explicitly. An entry may be made not a word by adding a `not_a_word'
 # field with a value of `true'. The main reason for putting such entries
 # into the dictionary is to add shortcut targets and maybe a whitelist
 # replacement.
 #
 # Each word may or may not have any number of shortcut target lines
 # starting with a `shortcut' entry and having at least a `f' frequency
 # value between 0 and 14, or the special value `whitelist' which becomes
 # 15, which is then taken to be the whitelist target of this word.
 #
 # Each word may also have any number of bigram lines starting with a
 # `bigram' entry containing the following word whose frequency should
 # override the unigram frequency when following the word this bigram is
 # for.
 #
 dictionary=main:en,locale=en,description=Sample wordlist,date=1351495318,version=1
  word=sample,f=200
   bigram=wordlist,f=243
  word=wordlist,f=180
  word=shortcut,f=176
   shortcut=target,f=10
  word=witelisted,f=10,not_a_word=true
   shortcut=whitelisted,f=whitelist
  word=profanity,f=0
	# This is a sample wordlist that can be converted to a binary dictionary
	# for use by the Latin IME.
	# The file is essentially a CSV file, with indent level denoting nesting.
	#
	# The file starts with a single CSV line with the header attributes. Whatever
	# the content, these are included as is in the binary file. The first attribute
	# of the file should be `dictionary'. Usual fields are `locale', `description',
	# `date', `version', `options'.
	#
	# Each word has a `word' entry and at least a `f' argument denoting its
	# probability, as an integer between 0 and 255 on a logarithmic scale, with
	# 255 meaning 1 and each decrement in 1 dividing probability by 1.15.
	# As a special case, a weight of 0 is taken to mean profanity - words that
	# should not be considered a typo, but that should never be suggested
	# explicitly. An entry may be made not a word by adding a `not_a_word'
	# field with a value of `true'. The main reason for putting such entries
	# into the dictionary is to add shortcut targets and maybe a whitelist
	# replacement.
	#
	# Each word may or may not have any number of shortcut target lines
	# starting with a `shortcut' entry and having at least a `f' frequency
	# value between 0 and 14, or the special value `whitelist' which becomes
	# 15, which is then taken to be the whitelist target of this word.
	#
	# Each word may also have any number of bigram lines starting with a
	# `bigram' entry containing the following word whose frequency should
	# override the unigram frequency when following the word this bigram is
	# for.
	#
	dictionary=main:en,locale=en,description=Sample wordlist,date=1351495318,version=1
	word=sample,f=200
	bigram=wordlist,f=243
	word=wordlist,f=180
	word=shortcut,f=176
	shortcut=target,f=10
	word=witelisted,f=10,not_a_word=true
	shortcut=whitelisted,f=whitelist
	word=profanity,f=0