TreeMappingAlgorithm.cs
上传用户:hbhltzc
上传日期:2022-06-04
资源大小:1925k
文件大小:16k
源码类别:

xml/soap/webservice

开发平台:

Visual C++

  1. //------------------------------------------------------------------------------
  2. // <copyright file="TreeMappingAlgorithm.cs" company="Microsoft">
  3. //     Copyright (c) Microsoft Corporation.  All rights reserved.
  4. // </copyright>                                                                
  5. //------------------------------------------------------------------------------
  6. using System;
  7. using System.Diagnostics;
  8. namespace Microsoft.XmlDiffPatch
  9. {
  10. //////////////////////////////////////////////////////////////////
  11. // MinimalTreeDistanceAlgo
  12. //
  13. internal class MinimalTreeDistanceAlgo
  14. {
  15.     //////////////////////////////////////////////////////////////////
  16.     // MinimalTreeDistanceAlgo.Distance
  17.     //
  18.     private struct Distance
  19.     {
  20.         internal int _cost;
  21.         internal EditScript _editScript;
  22.     }
  23. // Fields
  24.     // XmlDiff
  25.     XmlDiff _xmlDiff;
  26.     // nodes in the post-order numbering - cached from XmlDiff 
  27.     XmlDiffNode[] _sourceNodes;
  28.     XmlDiffNode[] _targetNodes;
  29.     // distances between all possible pairs of subtrees
  30.     Distance[,] _treeDist;
  31.     // distances between all possible pairs of forests - used by ComputeTreeDistance
  32.     // method. It is hold here and allocated just once in the FindMinimalDistance method
  33.     // instead of allocating it as a local variable in the ComputeTreeDistance method.
  34.     Distance[,] _forestDist;
  35. // Static fields
  36.     static readonly EditScriptEmpty EmptyEditScript = new EditScriptEmpty();
  37.     // Operation costs
  38.     internal static readonly int[] OperationCost = {
  39.             0,              // Match                      = 0,
  40.             4,              // Add                        = 1,
  41.             4,              // Remove                     = 2,
  42.             1,              // ChangeElementName          = 3,
  43.             1,              // ChangeElementAttr1         = 4,
  44.             2,              // ChangeElementAttr2         = 5,
  45.         3,              // ChangeElementAttr3         = 6,
  46.         2,              // ChangeElementNameAndAttr1  = 7,
  47.         3,              // ChangeElementNameAndAttr2  = 8,
  48.         4,              // ChangeElementNameAndAttr3  = 9,
  49.             4,              // ChangePI                   = 10,
  50.             4,              // ChangeER                   = 11,
  51.             4,              // ChangeCharacterData        = 12,
  52.             4,              // ChangeXmlDeclaration       = 13,
  53.             4,              // ChangeDTD                  = 14,
  54.             int.MaxValue/2, // Undefined                  = 15,
  55.     };
  56. // Constructor
  57.     internal MinimalTreeDistanceAlgo( XmlDiff xmlDiff ) 
  58.     {
  59.         Debug.Assert( OperationCost.Length - 1 == (int)XmlDiffOperation.Undefined,
  60.                       "Correct the OperationCost array so that it reflects the XmlDiffOperation enum." );
  61.         Debug.Assert( xmlDiff != null );
  62.         _xmlDiff = xmlDiff;
  63.     }
  64. // Methods
  65.     internal EditScript FindMinimalDistance()
  66.     {
  67.         EditScript resultEditScript = null;
  68.         try
  69.         {
  70.             // cache sourceNodes and targetNodes arrays
  71.             _sourceNodes = _xmlDiff._sourceNodes;
  72.             _targetNodes = _xmlDiff._targetNodes;
  73.             // create the _treeDist array - it contains distances between subtrees.
  74.             // The zero-indexed row and column are not used.
  75.             // This is to have the consistent indexing of all arrays in the algorithm;
  76.             // _forestDist array requires 0-indexed border fields for recording the distance 
  77.             // of empty forest.
  78.             _treeDist = new Distance[ _sourceNodes.Length, _targetNodes.Length ];
  79.             // create _forestDist array;
  80.             // Parts of this array are independently used in subsequent calls of ComputeTreeDistance.
  81.             // The array is allocated just once here in the biggest bounds it will ever need
  82.             // instead of allocating it in each call of ComputeTreeDistance as a local variable.
  83.             _forestDist = new Distance[ _sourceNodes.Length, _targetNodes.Length ];
  84.             // the algorithm; computes the _treeDist array
  85.             int i, j;
  86.             for ( i = 1; i < _sourceNodes.Length; i++ )
  87.             {
  88.                 if ( _sourceNodes[i].IsKeyRoot )
  89.                 {
  90.                     for ( j = 1; j < _targetNodes.Length; j++ )
  91.                     {
  92.                         if ( _targetNodes[j].IsKeyRoot )
  93.                         {
  94.                             ComputeTreeDistance( i, j );
  95.                         }
  96.                     }
  97.                 }
  98.             }
  99.             // get the result edit script
  100.             resultEditScript = _treeDist[ _sourceNodes.Length-1, _targetNodes.Length-1 ]._editScript;
  101.         }
  102.         finally
  103.         {
  104.             _forestDist = null; 
  105.             _treeDist = null;
  106.             _sourceNodes = null;
  107.             _targetNodes = null;
  108.         }
  109.         // normalize the found edit script (expands script references etc.)
  110.         return NormalizeScript( resultEditScript );
  111.     }
  112.     private void ComputeTreeDistance( int sourcePos, int targetPos ) 
  113.     {
  114.         int sourcePosLeft = _sourceNodes[ sourcePos ].Left;
  115.         int targetPosLeft = _targetNodes[ targetPos ].Left;
  116.         int i, j;
  117.         // init borders of _forestDist array
  118.         EditScriptAddOpened esAdd = new EditScriptAddOpened( targetPosLeft, EmptyEditScript );
  119.         EditScriptRemoveOpened esRemove = new EditScriptRemoveOpened( sourcePosLeft, EmptyEditScript );
  120.         _forestDist[ sourcePosLeft-1, targetPosLeft-1 ]._cost = 0;
  121.         _forestDist[ sourcePosLeft-1, targetPosLeft-1 ]._editScript = EmptyEditScript;
  122.         for ( i = sourcePosLeft; i <= sourcePos; i++ ) 
  123.         {
  124.             _forestDist[ i, targetPosLeft-1 ]._cost = ( i - sourcePosLeft + 1 ) * OperationCost[ (int)XmlDiffOperation.Remove ];
  125.             _forestDist[ i, targetPosLeft-1 ]._editScript = esRemove;
  126.         }
  127.         for ( j = targetPosLeft; j <= targetPos; j++ )
  128.         {
  129.             _forestDist[ sourcePosLeft-1, j ]._cost = ( j - targetPosLeft + 1 ) * OperationCost[ (int)XmlDiffOperation.Add ];
  130.             _forestDist[ sourcePosLeft-1, j ]._editScript = esAdd;
  131.         }
  132. #if DEBUG
  133.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "nForest distance (" + sourcePos + "," + targetPos + "):n" );
  134.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "       0    " );
  135.         for ( j = targetPosLeft; j <= targetPos; j++ )
  136.             Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, j + "    " );
  137.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "n   " );
  138.         for ( j = targetPosLeft-1; j <= targetPos; j++ )
  139.             Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "*****" );
  140.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "n0  *   0   " );
  141.         for ( j = targetPosLeft; j <= targetPos; j++ )
  142.             Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "-" + _forestDist[ sourcePosLeft-1, j ]._cost + "   " );
  143.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "n" );
  144. #endif
  145.         // compute the inside of _forestDist array
  146.         for ( i = sourcePosLeft; i <= sourcePos; i++ )
  147.         {
  148. #if DEBUG
  149.             Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, i + "  *  |" + _forestDist[ i, targetPosLeft-1 ]._cost + "   " );
  150. #endif
  151.             for ( j = targetPosLeft; j <= targetPos; j++ )
  152.             {
  153.                 int sourceCurLeft = _sourceNodes[i].Left;
  154.                 int targetCurLeft = _targetNodes[j].Left;
  155.  
  156.                 int removeCost = _forestDist[ i-1, j ]._cost + OperationCost[ (int)XmlDiffOperation.Remove ]; 
  157.                 int addCost  = _forestDist[ i, j-1 ]._cost + OperationCost[ (int)XmlDiffOperation.Add ];
  158.                 if ( sourceCurLeft == sourcePosLeft  &&  targetCurLeft == targetPosLeft )
  159.                 {
  160.                     XmlDiffOperation changeOp = _sourceNodes[i].GetDiffOperation( _targetNodes[j], _xmlDiff );
  161.                     
  162.                     Debug.Assert( XmlDiff.IsChangeOperation( changeOp ) || 
  163.                                   changeOp == XmlDiffOperation.Match ||
  164.                                   changeOp == XmlDiffOperation.Undefined );
  165.                     if ( changeOp == XmlDiffOperation.Match ) 
  166.                     {
  167.                         // identical nodes matched
  168.                         OpNodesMatch( i, j );
  169.                     }
  170.                     else
  171.                     {
  172.                         int changeCost = _forestDist[ i-1, j-1 ]._cost + OperationCost[ (int)changeOp ];
  173.                         if ( changeCost < addCost )
  174.                         {
  175.                             // operation 'change'
  176.                             if ( changeCost < removeCost )
  177.                                 OpChange( i, j, changeOp, changeCost );
  178.                             // operation 'remove'
  179.                             else
  180.                                 OpRemove( i, j, removeCost );
  181.                         }
  182.                         else
  183.                         {
  184.                             // operation 'add'
  185.                             if ( addCost < removeCost )
  186.                                 OpAdd( i, j, addCost );
  187.                             // operation 'remove'
  188.                             else 
  189.                                 OpRemove( i, j, removeCost );
  190.                         }
  191.                     }
  192.                         
  193.                     _treeDist[ i, j ]._cost = _forestDist[ i, j ]._cost;
  194.                     _treeDist[ i, j ]._editScript = _forestDist[ i, j ]._editScript.GetClosedScript( i, j );;
  195.                 }
  196.                 else
  197.                 {
  198.                     int m = sourceCurLeft - 1;
  199.                     int n = targetCurLeft - 1;
  200.                     if ( m < sourcePosLeft - 1 ) m = sourcePosLeft - 1;
  201.                     if ( n < targetPosLeft - 1 ) n = targetPosLeft - 1;
  202.                     // cost of concatenating of the two edit scripts
  203.                     int compoundEditCost = _forestDist[ m, n ]._cost + _treeDist[ i, j ]._cost;
  204.                     if ( compoundEditCost < addCost )
  205.                     {
  206.                         if ( compoundEditCost < removeCost )
  207.                         {
  208.                             // copy script
  209.                             if ( _treeDist[ i, j ]._editScript == EmptyEditScript ) 
  210.                             {
  211.                                 Debug.Assert( _treeDist[ i, j ]._cost == 0 );
  212.                                 OpCopyScript( i, j, m, n );
  213.                             }
  214.                             // concatenate scripts
  215.                             else
  216.                                 OpConcatScripts( i, j, m, n );
  217.                         }
  218.                         // operation 'remove'
  219.                         else
  220.                             OpRemove( i, j, removeCost );
  221.                     }
  222.                     else 
  223.                     {
  224.                         // operation 'add'
  225.                         if ( addCost < removeCost) 
  226.                             OpAdd( i, j, addCost );
  227.                         // operation 'remove'
  228.                         else 
  229.                             OpRemove( i, j, removeCost );
  230.                     }
  231.                 }
  232. #if DEBUG
  233.                 Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, _forestDist[ i, j ]._cost + "   " );
  234. #endif
  235.             }
  236. #if DEBUG
  237.             Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "n" );
  238. #endif            
  239.         }
  240.     }
  241.     private void OpChange( int i, int j, XmlDiffOperation changeOp, int cost )
  242.     {
  243. #if DEBUG
  244.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "*" );
  245. #endif
  246.         _forestDist[ i, j ]._editScript = new EditScriptChange( i, j, changeOp, _forestDist[ i-1, j-1 ]._editScript.GetClosedScript( i-1, j-1 ) );
  247.         _forestDist[ i, j ]._cost = cost;
  248.     }
  249.     private void OpAdd( int i, int j, int cost )
  250.     {
  251. #if DEBUG
  252.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "-" );
  253. #endif
  254.         EditScriptAddOpened openedAdd = _forestDist[ i, j-1 ]._editScript as EditScriptAddOpened;
  255.         if ( openedAdd == null )
  256.             openedAdd = new EditScriptAddOpened( j, _forestDist[ i, j-1 ]._editScript.GetClosedScript( i, j-1 ) );
  257.         _forestDist[ i, j ]._editScript = openedAdd;
  258.         _forestDist[ i, j ]._cost = cost;
  259.     }
  260.     private void OpRemove( int i, int j, int cost )
  261.     {
  262. #if DEBUG
  263.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "|" );
  264. #endif
  265.         EditScriptRemoveOpened openedRemove = _forestDist[ i-1, j ]._editScript as EditScriptRemoveOpened;
  266.         if ( openedRemove == null )
  267.             openedRemove  = new EditScriptRemoveOpened( i, _forestDist[ i-1, j ]._editScript.GetClosedScript( i-1, j ) );
  268.         _forestDist[ i, j ]._editScript = openedRemove;
  269.         _forestDist[ i, j ]._cost = cost;
  270.     }
  271.     private void OpNodesMatch( int i, int j )
  272.     {
  273. #if DEBUG
  274.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "\" );
  275. #endif
  276.         EditScriptMatchOpened openedMatch = _forestDist[ i-1, j-1 ]._editScript as EditScriptMatchOpened;
  277.         if ( openedMatch == null )
  278.             openedMatch  = new EditScriptMatchOpened( i, j, _forestDist[ i-1, j-1 ]._editScript.GetClosedScript( i-1, j-1 ) );
  279.         _forestDist[ i, j ]._editScript = openedMatch;
  280.         _forestDist[ i, j ]._cost = _forestDist[ i-1, j-1 ]._cost;
  281.     }
  282.     private void OpCopyScript( int i, int j,
  283.                                int m, int n )
  284.     {
  285. #if DEBUG
  286.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "&" );
  287. #endif
  288.         _forestDist[ i, j ]._cost = _forestDist[ m, n ]._cost;
  289.         _forestDist[ i, j ]._editScript = _forestDist[ m, n ]._editScript.GetClosedScript( m, n );
  290.     }
  291.     private void OpConcatScripts( int i, int j, int m, int n )
  292.     {
  293. #if DEBUG
  294.         Trace.WriteIf( XmlDiff.T_ForestDistance.Enabled, "&" );
  295. #endif
  296.         _forestDist[ i, j ]._editScript = new EditScriptReference( _treeDist[ i, j ]._editScript, _forestDist[ m, n ]._editScript.GetClosedScript( m, n ));
  297.         _forestDist[ i, j ]._cost = _treeDist[ i, j ]._cost + _forestDist[ m, n ]._cost;
  298.     }
  299. // Static methods
  300.     // This method expands 'reference edit script' items and removes the last item 
  301.     // (which is the static instance of EmptyEditScript).
  302.     static private EditScript NormalizeScript( EditScript es )
  303.     {
  304.         EditScript returnES = es;
  305.         EditScript curES = es;
  306.         EditScript prevES = null;
  307.         while ( curES != EmptyEditScript )
  308.         {
  309.             Debug.Assert( curES != null );
  310.             if ( curES.Operation != EditScriptOperation.EditScriptReference )
  311.             {
  312.                 prevES = curES;
  313.                 curES = curES._nextEditScript;
  314.             }
  315.             else
  316.             {
  317.                 EditScriptReference refES = curES as EditScriptReference;
  318.                 
  319.                 EditScript lastES = refES._editScriptReference;
  320.                 Debug.Assert( lastES != EmptyEditScript  && lastES != null );
  321.                 while ( lastES.Next != EmptyEditScript )
  322.                 {
  323.                     lastES = lastES._nextEditScript;
  324.                     Debug.Assert( lastES != null );
  325.                 }
  326.                 lastES._nextEditScript = curES._nextEditScript;
  327.                 curES = refES._editScriptReference;
  328.                 if ( prevES == null ) 
  329.                     returnES = curES;
  330.                 else
  331.                     prevES._nextEditScript = curES;
  332.             }
  333.         }
  334.         if ( prevES != null ) 
  335.             prevES._nextEditScript = null;
  336.         else
  337.             returnES = null;
  338.         return returnES;
  339.     }
  340. }
  341. }