정렬 거리 매트릭스 포함 소스코드

IT 위키

소스 코드 (python)[편집 | 원본 편집]

import numpy as np

def alignment_distance(X, Y, delta_eq=-1, delta_neq=1, delta_gap=2):
    m, n = len(X), len(Y)
    dp = np.zeros((m + 1, n + 1), dtype=int)
    traceback = [[None]*(n + 1) for _ in range(m + 1)]

    # 초기화
    for i in range(1, m + 1):
        dp[i][0] = i * delta_gap
        traceback[i][0] = (i-1, 0)
    for j in range(1, n + 1):
        dp[0][j] = j * delta_gap
        traceback[0][j] = (0, j-1)

    # DP 채우기
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            match_cost = delta_eq if X[i-1] == Y[j-1] else delta_neq
            choices = [
                (dp[i-1][j-1] + match_cost, (i-1, j-1)),  # 대각선
                (dp[i-1][j] + delta_gap, (i-1, j)),       # 위
                (dp[i][j-1] + delta_gap, (i, j-1))        # 왼쪽
            ]
            dp[i][j], traceback[i][j] = min(choices)

    return dp, traceback

def print_alignment_path(traceback, X, Y, dp):
    i, j = len(X), len(Y)
    path = []
    while i > 0 or j > 0:
        path.append((i, j))
        i, j = traceback[i][j]
    path.append((0, 0))
    path.reverse()

    print("\n Alignment path (with character pairs):")
    for (i, j) in path:
        x_char = X[i-1] if i > 0 else '-'
        y_char = Y[j-1] if j > 0 else '-'
        print(f"({i},{j}) : {x_char}{y_char}")

    print(f"\nTotal alignment cost: {dp[len(X)][len(Y)]}")

# 예제 실행
X = "final"
Y = "infill"

dp, traceback = alignment_distance(X, Y)
print(" Alignment cost matrix:")
print(dp)

print_alignment_path(traceback, X, Y, dp)

출력 예시[편집 | 원본 편집]

Alignment cost matrix:
[[ 0  2  4  6  8 10 12]
 [ 2  1  3  3  5  7  9]
 [ 4  1  2  4  2  4  6]
 [ 6  3  0  2  4  3  5]
 [ 8  5  2  1  3  5  4]
 [10  7  4  3  2  2  4]]

Alignment path (with character pairs):
(0,0) : - ↔ -
(0,1) : - ↔ i
(0,2) : - ↔ n
(1,3) : f ↔ f
(2,4) : i ↔ i
(3,4) : n ↔ i
(4,5) : a ↔ l
(5,6) : l ↔ l

Total alignment cost: 4