For the rare occassions, where you want:

1. multibyte UTF-8 characters

2. linear memory consumption (that is O(n+m) , not O(n*m))

3. learn the string which is the longest common subsequence

4. reasonable (that is O(n*m)) time complexity

Consider this implementation:

<?php

class Strings

{

public static function len ( $a ){

return mb_strlen ( $a , 'UTF-8' );

}

public static function substr ( $a , $x , $y = null ){

if( $y === NULL ){

$y = self :: len ( $a );

}

return mb_substr ( $a , $x , $y , 'UTF-8' );

}

public static function letters ( $a ){

$len = self :: len ( $a );

if( $len == 0 ){

return array();

}else if( $len == 1 ){

return array( $a );

}else{

return Arrays :: concat (

self :: letters ( self :: substr ( $a , 0 , $len >> 1 )),

self :: letters ( self :: substr ( $a , $len >> 1 ))

);

}

}

private static function lcs_last_column (array $A ,array $B ){

$al = count ( $A );

$bl = count ( $B );

$last_column = array();

for( $i = 0 ; $i <= $al ;++ $i ){

$current_row = array();

for( $j = 0 ; $j <= $bl ;++ $j ){

if( $i == 0 || $j == 0 ){

$v = 0 ;

}else if( $A [ $i - 1 ]=== $B [ $j - 1 ]){

$v = 1 + $last_row [ $j - 1 ];

}else{

$v = max ( $last_row [ $j ], $current_row [ $j - 1 ]);

}

$current_row [] = $v ;

}

$last_column [] = $current_row [ $bl ];

$last_row = $current_row ;

}

return $last_column ;

}

public static function lcs ( $a , $b ){

$A = self :: letters ( $a );

$B = self :: letters ( $b );

$bl = count ( $B );

if( $bl == 0 ){

return '' ;

}else if( $bl == 1 ){

return FALSE === array_search ( $B [ 0 ], $A , true )? '' : $B [ 0 ];

}

$left = self :: lcs_last_column ( $A , array_slice ( $B , 0 , $bl >> 1 ));

$right = array_reverse ( self :: lcs_last_column ( array_reverse ( $A ), array_reverse ( array_slice ( $B , $bl >> 1 ))));



$best_i = 0 ;

$best_lcs = 0 ;

foreach( $left as $i => $lcs_left ){

$option = $lcs_left + $right [ $i ];

if( $best_lcs < $option ){

$best_lcs = $option ;

$best_i = $i ;

}

}

return

self :: lcs ( self :: substr ( $a , 0 , $best_i ), self :: substr ( $b , 0 , $bl >> 1 )).

self :: lcs ( self :: substr ( $a , $best_i ), self :: substr ( $b , $bl >> 1 ));

}

?>

This is a classic implentation in which several tricks are used:

1. the strings are exploded into multi-byte characters in O(n lg n) time

2. instead of searching for the longest path in a precomputed two-dimensional array, we search for the best point which lays in the middle column. This is achieved by spliting the second string in half, and recursively calling the algorithm twice. The only thing we need from the recursive call are the values in the middle column. The trick is to return the last column from each recursive call, which is what we need for the left part, but requires one more trick for the right part - we simply mirror the strings and the array so that the last column is the first column. Then we just find the row which maximizes the sum of lenghts in each part.

3. one can prove that the time consumed by the algorithm is proportional to the area of the (imaginary) two-dimensional array, thus it is O(n*m).