めも書き

Python練習帳

「機械学習 はじめよう」http://gihyo.jp/dev/serial/01/machine-learning/ より

第6回 Numpyの導入

In [1]: import numpy

In [2]: numpy.test()
Running unit tests for numpy
NumPy version 2.0.0.dev-3b3735d
NumPy is installed in /Library/Python/2.7/site-packages/numpy-2.0.0.dev_3b3735d_20111219-py2.7-macosx-10.7-x86_64.egg/numpy
Python version 2.7.1 (r271:86832, Jun 25 2011, 05:09:01) [GCC 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)]
nose version 1.1.2
........(略)
----------------------------------------------------------------------
Ran 3422 tests in 18.146s

OK (KNOWNFAIL=3, SKIP=5)
Out[2]: <nose.result.TextTestResult run=3422 errors=0 failures=0>
In [1]: import numpy as np

In [2]: np.version.version
Out[2]: '2.0.0.dev-3b3735d'

In [3]: np.lookfor('array')

Search results for 'array'
--------------------------
numpy.array
    Create an array.
numpy.asarray
    Convert the input to an array.
numpy.ndarray
    ndarray(shape, dtype=float, buffer=None, offset=0,
...

array()

In [4]: a = np.array([1,2,3,4,5])

In [5]: b = np.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.]])

In [6]: a
Out[6]: array([1, 2, 3, 4, 5])

In [7]: b
Out[7]: 
array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [8]: a.dtype
Out[8]: dtype('int64')

In [9]: b.dtype
Out[9]: dtype('float64')

In [10]: a = np.array([1,2,3,4,5],dtype=float)

In [11]: a
Out[11]: array([ 1.,  2.,  3.,  4.,  5.])

In [12]: a.dtype
Out[12]: dtype('float64')
In [13]: a = np.arange(0.0, 10.0, 0.1)

In [14]: a
Out[14]: 
array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,
        1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,
        2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,
        3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,
        4.4,  4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,
        5.5,  5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,  6.5,
        6.6,  6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,  7.4,  7.5,  7.6,
        7.7,  7.8,  7.9,  8. ,  8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,
        8.8,  8.9,  9. ,  9.1,  9.2,  9.3,  9.4,  9.5,  9.6,  9.7,  9.8,
        9.9])

In [15]: a[20:40]
Out[15]: 
array([ 2. ,  2.1,  2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,
        3.1,  3.2,  3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9])

In [16]: a[0:100:5]
Out[16]: 
array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,
        5.5,  6. ,  6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5])
In [17]: a = np.arange(16)

In [18]: a
Out[18]: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [19]: a.reshape(4,4)
Out[19]: 
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [20]: a
Out[20]: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [21]: np.ravel(a)
Out[21]: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [22]: a
Out[22]: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
In [23]: a = np.array([[1,2,3]])

In [24]: a + 2
Out[24]: array([[3, 4, 5]])

In [25]: a * 2.0
Out[25]: array([[ 2.,  4.,  6.]])

In [26]: b = np.array([[4,5,6]])

In [27]: a + b
Out[27]: array([[5, 7, 9]])

In [28]: a * b
Out[28]: array([[ 4, 10, 18]])

統計関連の操作

In [1]: import numpy as np

In [2]: height = np.random.randint(140, 190, 100)

In [3]: height
Out[3]: 
array([182, 176, 189, 145, 184, 176, 160, 189, 151, 182, 155, 141, 156,
       187, 161, 145, 143, 182, 153, 140, 149, 184, 181, 152, 163, 167,
       149, 174, 167, 154, 169, 163, 177, 186, 154, 148, 152, 176, 188,
       184, 157, 168, 178, 180, 140, 142, 152, 161, 181, 151, 172, 174,
       170, 181, 184, 166, 163, 167, 170, 173, 173, 155, 142, 144, 148,
       168, 179, 180, 147, 143, 157, 182, 171, 158, 184, 180, 149, 143,
       168, 165, 145, 165, 158, 151, 173, 172, 173, 162, 166, 146, 160,
       157, 161, 167, 158, 159, 161, 152, 161, 153])

In [4]: np.mean(height)  ←平均値
Out[4]: 164.0

In [5]: np.median(height)  ←中央値
Out[5]: 163.0

In [6]: np.std(height)  ←標準偏差
Out[6]: 13.803622712896784

In [7]: np.sum(height)  ←総和
Out[7]: 16400

In [8]: np.amax(height)  ←最大値
Out[8]: 189

In [9]: np.amin(height)  ←最小値
Out[9]: 140

第7回 代表的な離散型確率分布

身長のヒストグラム

In [1]: import matplotlib.mlab as mlab

In [2]: import matplotlib.pyplot as plt

In [3]: import numpy as np

In [4]: sample = 1000

In [5]: mu, sigma = 170, 5

In [6]: data = np.random.normal(mu, sigma, sample)

In [7]: n, bins, patches = plt.hist(data, normed=1, alpha=0.75, align='mid')

f:id:to33k:20111226161117p:plain

In [8]: y = mlab.normpdf(bins, mu, sigma)

In [9]: l = plt.plot(bins, y, 'r-', linewidth=1)

f:id:to33k:20111226161145p:plain

In [10]: plt.title(r'$\mathrm{Histgram\ of\ Height:}\ \mu=%d,\ \sigma=%d$' % (mu, sigma))
Out[10]: <matplotlib.text.Text at 0x110695b50>

f:id:to33k:20111226161203p:plain

In [11]: plt.xlabel('Height')
Out[11]: <matplotlib.text.Text at 0x110686410>

f:id:to33k:20111226161244p:plain

In [12]: plt.ylabel('Probability')
Out[12]: <matplotlib.text.Text at 0x110689d90>

f:id:to33k:20111226161318p:plain

In [13]: plt.grid(True)

In [14]: plt.show()

f:id:to33k:20111226161346p:plain