tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java - platform/packages/inputmethods/LatinIME - Git at Google

 /**
  * Copyright (C) 2012 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
  * the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  * License for the specific language governing permissions and limitations under
  * the License.
  */

 package com.android.inputmethod.latin.dicttool;

 import com.android.inputmethod.latin.makedict.FusionDictionary;
 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
 import com.android.inputmethod.latin.makedict.WeightedString;
 import com.android.inputmethod.latin.makedict.WordProperty;

 import java.util.Arrays;
 import java.util.ArrayList;
 import java.util.HashMap;

 public class Diff extends Dicttool.Command {
     public static final String COMMAND = "diff";

     public Diff() {
     }

     @Override
     public String getHelp() {
         return COMMAND + " [-p] <dict> <dict> : shows differences between two dictionaries.\n"
                 + "  If -p (plumbing) option is given, produce output suitable for a script";
     }

     @Override
     public void run() {
         if (mArgs.length < 2) {
             throw new RuntimeException("Not enough arguments for command " + COMMAND);
         }
         final boolean plumbing;
         if ("-p".equals(mArgs[0])) {
             plumbing = true;
             mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
             if (mArgs.length != 2) { // There should be only 2 arguments left
                 throw new RuntimeException("Wrong number of arguments for command " + COMMAND);
             }
         } else {
             plumbing = false;
         }
         final FusionDictionary dict0 =
                 BinaryDictOffdeviceUtils.getDictionary(mArgs[0], false /* report */);
         if (null == dict0) throw new RuntimeException("Can't read dictionary " + mArgs[0]);
         final FusionDictionary dict1 =
                 BinaryDictOffdeviceUtils.getDictionary(mArgs[1], false /* report */);
         if (null == dict1) throw new RuntimeException("Can't read dictionary " + mArgs[1]);
         if (!plumbing) {
             System.out.println("Header :");
             diffHeaders(dict0, dict1);
             if (languageDiffers(dict0, dict1)) {
                 // We only check for the language here. The rationale is that one may meaningfully
                 // diff a en_US with a en_GB dictionary, but someone who diffs a de dict with a
                 // pt_BR dict is almost certainly only interested in header-level diff, and the word
                 // diff would be very large, meaningless, and annoying.
                 return;
             }
             System.out.println("Body :");
         }
         diffWords(dict0, dict1);
     }

     private static boolean languageDiffers(final FusionDictionary dict0,
             final FusionDictionary dict1) {
         // If either of the dictionaries have no locale, assume it's okay
         if (null == dict0.mOptions.mAttributes.get("locale")) return false;
         if (null == dict1.mOptions.mAttributes.get("locale")) return false;
         final String dict0Lang = dict0.mOptions.mAttributes.get("locale").split("_", 3)[0];
         final String dict1Lang = dict1.mOptions.mAttributes.get("locale").split("_", 3)[0];
         return !dict0Lang.equals(dict1Lang);
     }

     private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) {
         boolean hasDifferences = false;
         final HashMap<String, String> options1 = new HashMap<>(dict1.mOptions.mAttributes);
         for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
             if (!dict0.mOptions.mAttributes.get(optionKey).equals(
                     dict1.mOptions.mAttributes.get(optionKey))) {
                 System.out.println("  " + optionKey + " : "
                         + dict0.mOptions.mAttributes.get(optionKey) + " <=> "
                         + dict1.mOptions.mAttributes.get(optionKey));
                 hasDifferences = true;
             }
             options1.remove(optionKey);
         }
         for (final String optionKey : options1.keySet()) {
             System.out.println("  " + optionKey + " : null <=> " + options1.get(optionKey));
             hasDifferences = true;
         }
         if (!hasDifferences) {
             System.out.println("  No differences");
         }
     }

     private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
         boolean hasDifferences = false;
         for (final WordProperty word0Property : dict0) {
             final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
                     word0Property.mWord);
             if (null == word1PtNode) {
                 // This word is not in dict1
                 System.out.println("Deleted: " + word0Property.mWord + " "
                         + word0Property.getProbability());
                 hasDifferences = true;
             } else {
                 // We found the word. Compare frequencies, shortcuts, bigrams
                 if (word0Property.getProbability() != word1PtNode.getProbability()) {
                     System.out.println("Probability changed: " + word0Property.mWord + " "
                             + word0Property.getProbability() + " -> "
                             + word1PtNode.getProbability());
                     hasDifferences = true;
                 }
                 if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) {
                     System.out.println("Not a word: " + word0Property.mWord + " "
                             + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
                     hasDifferences = true;
                 }
                 if (word0Property.mIsPossiblyOffensive != word1PtNode.getIsPossiblyOffensive()) {
                     System.out.println("Possibly-offensive: " + word0Property.mWord + " "
                             + word0Property.mIsPossiblyOffensive + " -> "
                             + word1PtNode.getIsPossiblyOffensive());
                     hasDifferences = true;
                 }
                 hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
                         "Bigram", word0Property.getBigrams(), word1PtNode.getBigrams());
             }
         }
         for (final WordProperty word1Property : dict1) {
             final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
                     word1Property.mWord);
             if (null == word0PtNode) {
                 // This word is not in dict0
                 System.out.println("Added: " + word1Property.mWord + " "
                         + word1Property.getProbability());
                 hasDifferences = true;
             }
         }
         if (!hasDifferences) {
             System.out.println("  No differences");
         }
     }

     private static boolean hasAttributesDifferencesAndPrintThemIfAny(final String word,
             final String type, final ArrayList<WeightedString> list0,
             final ArrayList<WeightedString> list1) {
         if (null == list1) {
             if (null == list0) return false;
             for (final WeightedString attribute0 : list0) {
                 System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
                         + attribute0.getProbability());
             }
             return true;
         }
         boolean hasDifferences = false;
         if (null != list0) {
             for (final WeightedString attribute0 : list0) {
                 // The following tests with #equals(). The WeightedString#equals() method returns
                 // true if both the string and the frequency are the same.
                 if (!list1.contains(attribute0)) {
                     hasDifferences = true;
                     // Search for a word with the same string but a different frequency
                     boolean foundString = false;
                     for (final WeightedString attribute1 : list1) {
                         if (attribute0.mWord.equals(attribute1.mWord)) {
                             System.out.println(type + " freq changed: " + word + " "
                                     + attribute0.mWord + " " + attribute0.getProbability() + " -> "
                                     + attribute1.getProbability());
                             list1.remove(attribute1);
                             foundString = true;
                             break;
                         }
                     }
                     if (!foundString) {
                         // We come here if we haven't found any matching string.
                         System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
                                 + attribute0.getProbability());
                     }
                 } else {
                     list1.remove(attribute0);
                 }
             }
         }
         // We removed any matching word that we found, so now list1 only contains words that
         // are not included in list0.
         for (final WeightedString attribute1 : list1) {
             hasDifferences = true;
             System.out.println(type + " added: " + word + " " + attribute1.mWord + " "
                     + attribute1.getProbability());
         }
         return hasDifferences;
     }
 }
	/**
	* Copyright (C) 2012 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License"); you may not
	* use this file except in compliance with the License. You may obtain a copy of
	* the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	* License for the specific language governing permissions and limitations under
	* the License.
	*/

	package com.android.inputmethod.latin.dicttool;

	import com.android.inputmethod.latin.makedict.FusionDictionary;
	import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
	import com.android.inputmethod.latin.makedict.WeightedString;
	import com.android.inputmethod.latin.makedict.WordProperty;

	import java.util.Arrays;
	import java.util.ArrayList;
	import java.util.HashMap;

	public class Diff extends Dicttool.Command {
	public static final String COMMAND = "diff";

	public Diff() {
	}

	@Override
	public String getHelp() {
	return COMMAND + " [-p] <dict> <dict> : shows differences between two dictionaries.\n"
	+ " If -p (plumbing) option is given, produce output suitable for a script";
	}

	@Override
	public void run() {
	if (mArgs.length < 2) {
	throw new RuntimeException("Not enough arguments for command " + COMMAND);
	}
	final boolean plumbing;
	if ("-p".equals(mArgs[0])) {
	plumbing = true;
	mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
	if (mArgs.length != 2) { // There should be only 2 arguments left
	throw new RuntimeException("Wrong number of arguments for command " + COMMAND);
	}
	} else {
	plumbing = false;
	}
	final FusionDictionary dict0 =
	BinaryDictOffdeviceUtils.getDictionary(mArgs[0], false /* report */);
	if (null == dict0) throw new RuntimeException("Can't read dictionary " + mArgs[0]);
	final FusionDictionary dict1 =
	BinaryDictOffdeviceUtils.getDictionary(mArgs[1], false /* report */);
	if (null == dict1) throw new RuntimeException("Can't read dictionary " + mArgs[1]);
	if (!plumbing) {
	System.out.println("Header :");
	diffHeaders(dict0, dict1);
	if (languageDiffers(dict0, dict1)) {
	// We only check for the language here. The rationale is that one may meaningfully
	// diff a en_US with a en_GB dictionary, but someone who diffs a de dict with a
	// pt_BR dict is almost certainly only interested in header-level diff, and the word
	// diff would be very large, meaningless, and annoying.
	return;
	}
	System.out.println("Body :");
	}
	diffWords(dict0, dict1);
	}

	private static boolean languageDiffers(final FusionDictionary dict0,
	final FusionDictionary dict1) {
	// If either of the dictionaries have no locale, assume it's okay
	if (null == dict0.mOptions.mAttributes.get("locale")) return false;
	if (null == dict1.mOptions.mAttributes.get("locale")) return false;
	final String dict0Lang = dict0.mOptions.mAttributes.get("locale").split("_", 3)[0];
	final String dict1Lang = dict1.mOptions.mAttributes.get("locale").split("_", 3)[0];
	return !dict0Lang.equals(dict1Lang);
	}

	private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) {
	boolean hasDifferences = false;
	final HashMap<String, String> options1 = new HashMap<>(dict1.mOptions.mAttributes);
	for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
	if (!dict0.mOptions.mAttributes.get(optionKey).equals(
	dict1.mOptions.mAttributes.get(optionKey))) {
	System.out.println(" " + optionKey + " : "
	+ dict0.mOptions.mAttributes.get(optionKey) + " <=> "
	+ dict1.mOptions.mAttributes.get(optionKey));
	hasDifferences = true;
	}
	options1.remove(optionKey);
	}
	for (final String optionKey : options1.keySet()) {
	System.out.println(" " + optionKey + " : null <=> " + options1.get(optionKey));
	hasDifferences = true;
	}
	if (!hasDifferences) {
	System.out.println(" No differences");
	}
	}

	private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
	boolean hasDifferences = false;
	for (final WordProperty word0Property : dict0) {
	final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
	word0Property.mWord);
	if (null == word1PtNode) {
	// This word is not in dict1
	System.out.println("Deleted: " + word0Property.mWord + " "
	+ word0Property.getProbability());
	hasDifferences = true;
	} else {
	// We found the word. Compare frequencies, shortcuts, bigrams
	if (word0Property.getProbability() != word1PtNode.getProbability()) {
	System.out.println("Probability changed: " + word0Property.mWord + " "
	+ word0Property.getProbability() + " -> "
	+ word1PtNode.getProbability());
	hasDifferences = true;
	}
	if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) {
	System.out.println("Not a word: " + word0Property.mWord + " "
	+ word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
	hasDifferences = true;
	}
	if (word0Property.mIsPossiblyOffensive != word1PtNode.getIsPossiblyOffensive()) {
	System.out.println("Possibly-offensive: " + word0Property.mWord + " "
	+ word0Property.mIsPossiblyOffensive + " -> "
	+ word1PtNode.getIsPossiblyOffensive());
	hasDifferences = true;
	}
	hasDifferences \|= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
	"Bigram", word0Property.getBigrams(), word1PtNode.getBigrams());
	}
	}
	for (final WordProperty word1Property : dict1) {
	final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
	word1Property.mWord);
	if (null == word0PtNode) {
	// This word is not in dict0
	System.out.println("Added: " + word1Property.mWord + " "
	+ word1Property.getProbability());
	hasDifferences = true;
	}
	}
	if (!hasDifferences) {
	System.out.println(" No differences");
	}
	}

	private static boolean hasAttributesDifferencesAndPrintThemIfAny(final String word,
	final String type, final ArrayList<WeightedString> list0,
	final ArrayList<WeightedString> list1) {
	if (null == list1) {
	if (null == list0) return false;
	for (final WeightedString attribute0 : list0) {
	System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
	+ attribute0.getProbability());
	}
	return true;
	}
	boolean hasDifferences = false;
	if (null != list0) {
	for (final WeightedString attribute0 : list0) {
	// The following tests with #equals(). The WeightedString#equals() method returns
	// true if both the string and the frequency are the same.
	if (!list1.contains(attribute0)) {
	hasDifferences = true;
	// Search for a word with the same string but a different frequency
	boolean foundString = false;
	for (final WeightedString attribute1 : list1) {
	if (attribute0.mWord.equals(attribute1.mWord)) {
	System.out.println(type + " freq changed: " + word + " "
	+ attribute0.mWord + " " + attribute0.getProbability() + " -> "
	+ attribute1.getProbability());
	list1.remove(attribute1);
	foundString = true;
	break;
	}
	}
	if (!foundString) {
	// We come here if we haven't found any matching string.
	System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
	+ attribute0.getProbability());
	}
	} else {
	list1.remove(attribute0);
	}
	}
	}
	// We removed any matching word that we found, so now list1 only contains words that
	// are not included in list0.
	for (final WeightedString attribute1 : list1) {
	hasDifferences = true;
	System.out.println(type + " added: " + word + " " + attribute1.mWord + " "
	+ attribute1.getProbability());
	}
	return hasDifferences;
	}
	}