ch07
그래프와 그래프 이론
Node: 각 그래프의 점( 네트워크에서는 node(마디), 그래프에서는 vertex(꼭지점))
Edge: 각 점을 잇는 선( 네트워크에서는 link(가지), 그래프에서는 edge(마디))
Degree: 각 노드에서 발생된 엣지의 수
네트워크: 그래프
ch08
Maximum flow
problem
출발지와 목적지가 각각 하나인 네트워크로 돌아오고 나가는
최대 유통량이 얼마인가를 계산
흐르는 량을 최대로 하기 위해서는 각 가지를 통해 흐르는
용량은 얼마이어야 하는지 결정한다.
출발노드와 도착노드를 지정하고 실행하면 결과 값을 얻을
수 있다.
반드시 출발노드인 유입량과 도착노드인 유출량은 같아야 한다.
maximum flow
problem을 해결하기 위해서는 먼저 라이브러리를 받아야 한다.
pip install
ortools
from ortools.graph import pywrapgraph
start_nodes = [0, 0,
0, 1, 1, 2, 2, 3, 3] //출발 노드
end_nodes = [1, 2,
3, 2, 4, 3, 4, 2, 4] //끝 노드
capacities = [20,
30, 10, 40, 30, 10, 20, 5, 20] //최대 흐르는 양
max_flow =
pywrapgraph.SimpleMaxFlow()
for i in range(0,
len(start_nodes)):
max_flow.AddArcWithCapacity(start_nodes[i],
end_nodes[i], capacities[i])
# Find the maximum
flow between node 0 and node 4.
if max_flow.Solve(0,
4) == max_flow.OPTIMAL: //출발 노드는 0, 도착 노드는 4가 된다.
print('Max flow:', max_flow.OptimalFlow())
print('')
print('
Arc Flow / Capacity')
for i in range(max_flow.NumArcs()):
print('%1s -> %1s %3s /
%3s' % (
max_flow.Tail(i),
max_flow.Head(i),
max_flow.Flow(i),
max_flow.Capacity(i)))
else:
print('There was an issue with the max flow
input.')
<결과값>
Arc Flow / Capacity
0 -> 1 20
/ 20
0 -> 2 30
/ 30
0 -> 3 10
/ 10
1 -> 2 0
/ 40
1 -> 4 20
/ 30
2 -> 3 10
/ 10
2 -> 4 20
/ 20
3 -> 2 0
/ 5
3 -> 4 20
/ 20
시작점이 0이고
끝점이 4라면 0에서 60,
끝인 4에서 60이라는 점을 확인할 수 있다.
ch09
Minimum spanning
tree (최소 걸침나무 문제)
네트워크의 모든 마디를 서로 연결할 때 가지들의 총 길이가
최소가 되도록 하는 문제
별도 모듈 필요 없음.
sys
prim's algorithm
import sys
class Graph():
def __init__(self, vertices):
self.V = vertices
self.graph = [[0 for
column in range(vertices)]
for
row in range(vertices)]
def printMST(self, parent):
print("Edge
\tWeight")
for i in range(1,
self.V):
print(parent[i],
"-", i, "\t", self.graph[i][ parent[i] ])
def minKey(self, key, mstSet):
min = sys.maxsize
for v in
range(self.V):
if
key[v] < min and mstSet[v] == False:
min
= key[v]
min_index
= v
return min_index
def primMST(self):
key = [sys.maxsize] *
self.V
parent = [None] *
self.V
key[0] = 0
mstSet = [False] *
self.V
parent[0] = -1
for cout in
range(self.V):
u =
self.minKey(key, mstSet)
mstSet[u]
= True
for v in
range(self.V):
if
self.graph[u][v] > 0 and mstSet[v] == False and key[v] >
self.graph[u][v]:
key[v]
= self.graph[u][v]
parent[v]
= u
self.printMST(parent)
g = Graph(7) //7개의 노드 부분
# 행렬로 표시 : 직접적으로 연결되어 있지 않은 점과 점의
거리는 0 또는 아주 큰 수(M)으로 설정한 다. 가령 100이나 1000 또는
10000 등을 넣어준다.
# 0
1 2 3 4 5 6
g.graph = [[ 0, 50, 60, 80, 0,
0, 0], # 0
[50, 0, 0,
40, 70, 0, 0], #
1
[60, 0, 0,
55, 0, 95, 0], #
2
[80, 40, 55, 0, 45, 75, 93], # 3
[ 0, 70, 0, 45,
0, 0, 57], # 4
[ 0, 0, 95, 75,
0, 0, 67], # 5
[ 0, 0, 0,
93, 57, 67, 0]] # 6
g.primMST();
<결과값>
Edge Weight
0 - 1 50
3 - 2 55
1 - 3 40
3 - 4 45
6 - 5 67
4 - 6 57
ch10
shortest path
problem
: 가장 짧은 거리로 목적지를 가는 방법 ( TSP와는 다르다. TSP는 모든 점을 찍고 빠르게 돌아오는 것이다. )
shortest path
problem을 해결하기 위해서는 모듈을 받아와야 한다.
pip install
Dijkstar
import dijkstar
graph =
dijkstar.Graph()
graph.add_edge(1,
2, {'cost': 1}) //노드 1에서
2노드로 갈 때 드는 비용은 1
graph.add_edge(2,
3, {'cost': 2}) //노드 2에서
3노드로 갈 때 드는 비용은 2
graph.add_edge(1,
3, {'cost': 4}) //노드 1에서
3노드로 갈 때 드는 비용은 4
cost_func = lambda
u, v, e, prev_e: e['cost']
print(dijkstar.find_path(graph,
1, 3, cost_func=cost_func)) //1에서 3으로
<결과값>
PathInfo(nodes=[1,
2, 3], edges=[{'cost': 1}, {'cost': 2}], costs=[1, 2], total_cost=3) //지나야 하는 노드는 1=>2=>3, 비용이 1인 경로, 2인 경로를 지나 도착한다. 총 비용은 3이다.
여러개의 솔루션 중 하나를 선택하여 알려준다.
ch11
travelling
salesman problem (TSP)
모든 점을 돌아서 최단 거리로 다시 돌아오는 방법을 구하는
알고리즘
traveling salesman
problem 을 해결하기 위해서는 별도의 모듈을 받아와야 한다.
pip install
tsp_solver2
import tsp_solver.greedy
# Prepare the
square symmetric distance matrix for 3 nodes:
# Distance from A to B is 1.0
# B to C is 3.0
# A to C is 2.0
// 행렬의 반만 넣는다.
D = [[],
[1.0],
[2.0, 3.0]]
path = tsp_solver.greedy.solve_tsp(D)
// 행렬 삽입 후 실행
# path =
tsp_solver.greedy.solve_tsp(D, endpoints=(0, 2)) // 최대한 쓰지 않는다. 더 좋지 않은 솔루션을 도출 할 수
있기 때문이다. endpoints=(0,2) 는 0에서 출발, 2에서 마무리한다는 의미이다.
# will print
[1,0,2], path with total length of 3.0 units
print(path)
<결과값>
[1, 0, 2]
노드 1과 0을 지나 2에 도착하게 된다.
import
tsp_solver.greedy
D = [[],
[1.0],
[10000.0,3.0],
[1.0,2.0,3.0],
[4.0,10000.0,2.0,4.0]]
path =
tsp_solver.greedy.solve_tsp(D)
# path =
tsp_solver.greedy.solve_tsp(D, endpoints=(0, 2))
print(path)
<결과값>
[3, 0, 1, 2, 4]
3 => 0 => 1 => 2 => 4 그리고 4에서 3으로
가면 다시 돌아올 수 있게 된다.
ch12
Simulated-annealing
모의로 시뮬레이팅
하여 최적의 값에 근사하게 찾아가는 모델
1-1) Simulated Annealing for K-NN with IRIS DATA
import sklearn.model_selection
import sklearn.datasets
import sklearn.neighbors
import simulated_annealing.optimize
import numpy
iris = sklearn.datasets.load_iris()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(iris['data'], iris['target'], train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.neighbors.KNeighborsClassifier(n_neighbors=3) //n_neighbors 의 값은 아무거나 들어가도 상관없다.
clf_params = {'n_neighbors': numpy.arange(1, 20)} // arange(1, 20) 은 1에서 20까지의 수 중에서 찾아 낼 것이라는 의미
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train) //sa를 fit한다.
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
import sklearn.datasets
import sklearn.neighbors
import simulated_annealing.optimize
import numpy
iris = sklearn.datasets.load_iris()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(iris['data'], iris['target'], train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.neighbors.KNeighborsClassifier(n_neighbors=3) //n_neighbors 의 값은 아무거나 들어가도 상관없다.
clf_params = {'n_neighbors': numpy.arange(1, 20)} // arange(1, 20) 은 1에서 20까지의 수 중에서 찾아 낼 것이라는 의미
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train) //sa를 fit한다.
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
<결과값>
INFO: Number of possible iterations given cooling schedule: 160
2 T: 10.00000, score: 0.950173, std: 0.035547, params: {'n_neighbors':
13}
(중략)
40 T: 1.33484, score: 0.965499, std: 0.033863, params: {'n_neighbors':
9}
41 T: 1.33484, score: 0.943136, std: 0.047897, params: {'n_neighbors':
1}
Best score & params: 0.9668 {'n_neighbors': 8} //최상의 n은
8이다.
Accuracy train: 0.9732
Accuracy test: 0.9737 //test 데이터에 대한 accuracy는 0.9737
1-2) Simulated Annealing for k-NN about Cancer data
import sklearn.model_selection
import sklearn.datasets
import sklearn.neighbors
import simulated_annealing.optimize
import numpy
cancer = sklearn.datasets.load_breast_cancer()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(cancer.data, cancer.target, train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.neighbors.KNeighborsClassifier(n_neighbors=3)
clf_params = {'n_neighbors': numpy.arange(1, 50)}
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
import sklearn.datasets
import sklearn.neighbors
import simulated_annealing.optimize
import numpy
cancer = sklearn.datasets.load_breast_cancer()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(cancer.data, cancer.target, train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.neighbors.KNeighborsClassifier(n_neighbors=3)
clf_params = {'n_neighbors': numpy.arange(1, 50)}
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
<결과값>
Best score & params: 0.9221 {'n_neighbors': 6}
Accuracy train: 0.9413
Accuracy test: 0.9231
//결과값은 항상
바뀔 수 있다. 통계 기반으로 진행되기 때문이다.
2-1) Simluated Annealing for Logistic Regression
import sklearn.model_selection
import sklearn.datasets
import sklearn.linear_model
import simulated_annealing.optimize
import numpy
iris = sklearn.datasets.load_iris()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(iris['data'], iris['target'], train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.linear_model.LogisticRegression()
clf_params = {'C': numpy.logspace(-8, 10, 19, base=2), 'fit_intercept': [1, 0]} //-8 부터 10까지 19개의 절편을 확인해 볼 것이라는 의미, fit.intercept 는 절편이 있거나 또는 없는 경우를 확인한다.
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
import sklearn.datasets
import sklearn.linear_model
import simulated_annealing.optimize
import numpy
iris = sklearn.datasets.load_iris()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(iris['data'], iris['target'], train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.linear_model.LogisticRegression()
clf_params = {'C': numpy.logspace(-8, 10, 19, base=2), 'fit_intercept': [1, 0]} //-8 부터 10까지 19개의 절편을 확인해 볼 것이라는 의미, fit.intercept 는 절편이 있거나 또는 없는 경우를 확인한다.
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
<결과값>
(생략)
41 T: 1.33484, score: 0.645052, std: 0.127323, params: {'C': 0.125,
'fit_intercept': 0}
Best score & params: 0.9822 {'C': 128.0, 'fit_intercept': 0}
Accuracy train: 0.9821
Accuracy test: 0.9211
2-2) Simulated Annealing for LS about cancer data
import sklearn.model_selection
import sklearn.datasets
import sklearn.linear_model
import simulated_annealing.optimize
import numpy
cancer = sklearn.datasets.load_breast_cancer()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(cancer.data, cancer.target, train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.linear_model.LogisticRegression()
clf_params = {'C': numpy.logspace(-8, 10, 19, base=2), 'fit_intercept': [1, 0]}
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
import sklearn.datasets
import sklearn.linear_model
import simulated_annealing.optimize
import numpy
cancer = sklearn.datasets.load_breast_cancer()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(cancer.data, cancer.target, train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.linear_model.LogisticRegression()
clf_params = {'C': numpy.logspace(-8, 10, 19, base=2), 'fit_intercept': [1, 0]}
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
<결과값>
(생략)
41 T: 1.33484, score: 0.931437, std: 0.021011, params: {'C': 0.5,
'fit_intercept': 1}
Best score & params: 0.9497 {'C': 512.0, 'fit_intercept': 1}
Accuracy train: 0.9718
Accuracy test: 0.9650
3-1) Simulated Annealing for Decision Tree with IRIS data
import sklearn.model_selection
import sklearn.datasets
import sklearn.tree
import simulated_annealing.optimize
import numpy
iris = sklearn.datasets.load_iris()
X_train, X_test, y_train, y_test =
sklearn.model_selection.train_test_split(iris['data'], iris['target'],
train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.tree.DecisionTreeClassifier(random_state=0)
clf_params = {'max_depth': numpy.arange(1, 20)}
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params,
T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5,
max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f}
{}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred
== y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred
== y_test)))
<결과값>
(생략)
37 T: 1.33484, score: 0.966850, std: 0.023634, params: {'max_depth':
11}
38 T: 1.33484, score: 0.951211, std: 0.038087, params: {'max_depth': 2}
39 T: 1.33484, score: 0.966850, std: 0.023634, params: {'max_depth': 8}
40 T: 1.33484, score: 0.966850, std: 0.023634, params: {'max_depth': 5}
41 T: 1.33484, score: 0.951211, std: 0.038087, params: {'max_depth': 2}
Best score & params: 0.9668 {'max_depth': 10}
Accuracy train: 1.0000
Accuracy test: 0.9737
3-2) Simulated Annealing for Decision Tree with cancer data
import sklearn.model_selection
import sklearn.datasets
import sklearn.tree
import simulated_annealing.optimize
import numpy
cancer = sklearn.datasets.load_breast_cancer()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(cancer.data, cancer.target, train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.tree.DecisionTreeClassifier(random_state=0)
clf_params = {'max_depth': numpy.arange(1, 50)}
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
import sklearn.datasets
import sklearn.tree
import simulated_annealing.optimize
import numpy
cancer = sklearn.datasets.load_breast_cancer()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(cancer.data, cancer.target, train_size=0.75, test_size=0.25, random_state=0)
clf = sklearn.tree.DecisionTreeClassifier(random_state=0)
clf_params = {'max_depth': numpy.arange(1, 50)}
sa = simulated_annealing.optimize.SimulatedAnneal(clf, clf_params, T=10.0, T_min=0.001, alpha=0.75, verbose=True, max_iter=0.25, n_trans=5, max_runtime=300, cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
y_train_pred = sa.best_estimator_.predict(X_train)
y_test_pred = sa.best_estimator_.predict(X_test)
print("Best score & params: {:.4f} {}".format(sa.best_score_, sa.best_params_))
print("Accuracy train: {:.4f}".format(numpy.mean(y_train_pred == y_train)))
print("Accuracy test: {:.4f}".format(numpy.mean(y_test_pred == y_test)))
<결과값>
(생략)
41 T: 1.33484, score: 0.890819, std: 0.011933, params: {'max_depth':
30}
Best score & params: 0.8923 {'max_depth': 5}
Accuracy train: 0.9930
Accuracy test: 0.8951
댓글
댓글 쓰기