※ 김성훈 교수님의 [모두를 위한 딥러닝] 강의 정리
- https://www.youtube.com/watch?reload=9&v=BS6O0zOGX4E&feature=youtu.be&list=PLlMkM4tgfjnLSOjrEJN31gZATbcj_MpUm&fbclid=IwAR07UnOxQEOxSKkH6bQ8PzYj2vDop_J0Pbzkg3IVQeQ_zTKcXdNOwaSf_k0
- 참고자료 : Andrew Ng's ML class
1) https://class.coursera.org/ml-003/lecture
2) http://holehouse.org/mlclass/ (note)
1. Loading data from file
| import tensorflow as tf |
| import numpy as np |
| tf.set_random_seed(777) # for reproducibility |
| |
| xy = np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32) |
| x_data = xy[:, 0:-1] |
| y_data = xy[:, [-1]] |
| |
| # Make sure the shape and data are OK |
| print(x_data, "\nx_data shape:", x_data.shape) |
| print(y_data, "\ny_data shape:", y_data.shape) |
| # placeholders for a tensor that will be always fed. |
| X = tf.placeholder(tf.float32, shape=[None, 3]) |
| Y = tf.placeholder(tf.float32, shape=[None, 1]) |
| |
| W = tf.Variable(tf.random_normal([3, 1]), name='weight') |
| b = tf.Variable(tf.random_normal([1]), name='bias') |
| |
| # Hypothesis |
| hypothesis = tf.matmul(X, W) + b |
| |
| # Simplified cost/loss function |
| cost = tf.reduce_mean(tf.square(hypothesis - Y)) |
| |
| # Minimize |
| optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5) |
| train = optimizer.minimize(cost) |
| |
| # Launch the graph in a session. |
| sess = tf.Session() |
| # Initializes global variables in the graph. |
| sess.run(tf.global_variables_initializer()) |
| |
| for step in range(2001): |
| cost_val, hy_val, _ = sess.run([cost, hypothesis, train], |
| feed_dict={X: x_data, Y: y_data}) |
| if step % 10 == 0: |
| print(step, "Cost:", cost_val, "\nPrediction:\n", hy_val) |
1950, Cost: 2.8077145
Prediction:
array([[154.30186],
[183.31505],
[181.97646],
[194.59978],
[142.33385],
[ 99.34767]], dtype=float32))
1960, Cost: 2.7977974
Prediction:
array([[154.296 ],
[183.31776],
[181.97401],
[194.59859],
[142.33716],
[ 99.35353]], dtype=float32))
1970, Cost: 2.787885
Prediction:
array([[154.29016],
[183.32051],
[181.97154],
[194.5974 ],
[142.34042],
[ 99.35938]], dtype=float32))
1980, Cost: 2.778064
Prediction:
array([[154.28435],
[183.32324],
[181.9691 ],
[194.59624],
[142.3437 ],
[ 99.3652 ]], dtype=float32))
1990, Cost: 2.7683241
Prediction:
array([[154.27856],
[183.32594],
[181.96667],
[194.59506],
[142.34695],
[ 99.37102]], dtype=float32))
2000, Cost: 2.7586195
Prediction:
array([[154.27278 ],
[183.32866 ],
[181.96426 ],
[194.5939 ],
[142.35019 ],
[ 99.376816]], dtype=float32))
2. Loading data from multi-file
| import tensorflow as tf |
| tf.set_random_seed(777) # for reproducibility |
| |
| filename_queue = tf.train.string_input_producer( |
| ['data-01-test-score.csv'], shuffle=False, name='filename_queue') |
| |
| reader = tf.TextLineReader() |
| key, value = reader.read(filename_queue) |
| |
| # Default values, in case of empty columns. Also specifies the type of the |
| # decoded result. |
| record_defaults = [[0.], [0.], [0.], [0.]] |
| xy = tf.decode_csv(value, record_defaults=record_defaults) |
| |
| # collect batches of csv in |
| train_x_batch, train_y_batch = \ |
| tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10) |
| |
| # placeholders for a tensor that will be always fed. |
| X = tf.placeholder(tf.float32, shape=[None, 3]) |
| Y = tf.placeholder(tf.float32, shape=[None, 1]) |
| |
| W = tf.Variable(tf.random_normal([3, 1]), name='weight') |
| b = tf.Variable(tf.random_normal([1]), name='bias') |
| |
| # Hypothesis |
| hypothesis = tf.matmul(X, W) + b |
| |
| # Simplified cost/loss function |
| cost = tf.reduce_mean(tf.square(hypothesis - Y)) |
| |
| # Minimize |
| optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5) |
| train = optimizer.minimize(cost) |
| |
| # Launch the graph in a session. |
| sess = tf.Session() |
| # Initializes global variables in the graph. |
| sess.run(tf.global_variables_initializer()) |
| |
| # Start populating the filename queue. |
| coord = tf.train.Coordinator() |
| threads = tf.train.start_queue_runners(sess=sess, coord=coord) |
| |
| for step in range(2001): |
| x_batch, y_batch = sess.run([train_x_batch, train_y_batch]) |
| cost_val, hy_val, _ = sess.run( |
| [cost, hypothesis, train], feed_dict={X: x_batch, Y: y_batch}) |
| if step % 10 == 0: |
| print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val) |
| |
| coord.request_stop() |
| coord.join(threads) |
| |
| # Ask my score |
| print("Your score will be ", |
| sess.run(hypothesis, feed_dict={X: [[100, 70, 101]]})) |
| |
| print("Other scores will be ", |
| sess.run(hypothesis, feed_dict={X: [[60, 70, 110], [90, 100, 80]]})) |
1980, Cost: 2.2382462
Prediction:
array([[152.35132],
[183.37514],
[180.53424],
[197.20535],
[139.35315],
[103.52445],
[152.35132],
[183.37514],
[180.53424],
[197.20535]], dtype=float32))
1990, Cost: 3.407795
Prediction:
array([[139.34067],
[103.51443],
[152.33455],
[183.35727],
[180.5155 ],
[197.18425],
[139.34067],
[103.51443],
[152.33455],
[183.35727]], dtype=float32))
2000, Cost: 3.3214183
Prediction:
array([[180.62273],
[197.30028],
[139.42564],
[103.57615],
[152.42416],
[183.46718],
[180.62273],
[197.30028],
[139.42564],
[103.57615]], dtype=float32))
'Your score will be ', array([[182.8681]], dtype=float32))
'Other scores will be ', array([[169.80573], [177.92252]], dtype=float32))