정렬 거리 매트릭스 포함 소스코드
IT 위키
- 상위 문서: 정렬 거리
소스 코드 (python)[편집 | 원본 편집]
import numpy as np
def alignment_distance(X, Y, delta_eq=-1, delta_neq=1, delta_gap=2):
m, n = len(X), len(Y)
dp = np.zeros((m + 1, n + 1), dtype=int)
traceback = [[None]*(n + 1) for _ in range(m + 1)]
# 초기화
for i in range(1, m + 1):
dp[i][0] = i * delta_gap
traceback[i][0] = (i-1, 0)
for j in range(1, n + 1):
dp[0][j] = j * delta_gap
traceback[0][j] = (0, j-1)
# DP 채우기
for i in range(1, m + 1):
for j in range(1, n + 1):
match_cost = delta_eq if X[i-1] == Y[j-1] else delta_neq
choices = [
(dp[i-1][j-1] + match_cost, (i-1, j-1)), # 대각선
(dp[i-1][j] + delta_gap, (i-1, j)), # 위
(dp[i][j-1] + delta_gap, (i, j-1)) # 왼쪽
]
dp[i][j], traceback[i][j] = min(choices)
return dp, traceback
def print_alignment_path(traceback, X, Y, dp):
i, j = len(X), len(Y)
path = []
while i > 0 or j > 0:
path.append((i, j))
i, j = traceback[i][j]
path.append((0, 0))
path.reverse()
print("\n Alignment path (with character pairs):")
for (i, j) in path:
x_char = X[i-1] if i > 0 else '-'
y_char = Y[j-1] if j > 0 else '-'
print(f"({i},{j}) : {x_char} ↔ {y_char}")
print(f"\nTotal alignment cost: {dp[len(X)][len(Y)]}")
# 예제 실행
X = "final"
Y = "infill"
dp, traceback = alignment_distance(X, Y)
print(" Alignment cost matrix:")
print(dp)
print_alignment_path(traceback, X, Y, dp)
출력 예시[편집 | 원본 편집]
Alignment cost matrix: [[ 0 2 4 6 8 10 12] [ 2 1 3 3 5 7 9] [ 4 1 2 4 2 4 6] [ 6 3 0 2 4 3 5] [ 8 5 2 1 3 5 4] [10 7 4 3 2 2 4]] Alignment path (with character pairs): (0,0) : - ↔ - (0,1) : - ↔ i (0,2) : - ↔ n (1,3) : f ↔ f (2,4) : i ↔ i (3,4) : n ↔ i (4,5) : a ↔ l (5,6) : l ↔ l Total alignment cost: 4