github地址
import pandas as pdimport numpy as npimport matplotlib.pyplot as plt%matplotlib inline复制代码
/anaconda3/envs/py35/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88 return f(*args, **kwds)复制代码
from sklearn.linear_model import LinearRegressionfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import mean_squared_error复制代码
plt.style.use('ggplot')复制代码
data = pd.read_csv('Advertising.csv')复制代码
data.head()复制代码
Unnamed: 0 | TV | radio | newspaper | sales | |
---|---|---|---|---|---|
0 | 1 | 230.1 | 37.8 | 69.2 | 22.1 |
1 | 2 | 44.5 | 39.3 | 45.1 | 10.4 |
2 | 3 | 17.2 | 45.9 | 69.3 | 9.3 |
3 | 4 | 151.5 | 41.3 | 58.5 | 18.5 |
4 | 5 | 180.8 | 10.8 | 58.4 | 12.9 |
plt.scatter(data.TV, data.sales)复制代码
复制代码
plt.scatter(data.radio, data.sales)复制代码
复制代码
plt.scatter(data.newspaper, data.sales)复制代码
复制代码
x = data[['TV', 'radio', 'newspaper']]复制代码
y = data.sales复制代码
x_train, x_test, y_train, y_test = train_test_split(x, y)复制代码
len(x_train), len(y_train)复制代码
(150, 150)复制代码
len(x_test)复制代码
50复制代码
model = LinearRegression()复制代码
model.fit(x_train, y_train)复制代码
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)复制代码
model.coef_复制代码
array([ 0.04466416, 0.19594144, -0.00469486])复制代码
for i in zip(x_train.columns, model.coef_): print(i)复制代码
('TV', 0.04466415613441986)('radio', 0.1959414384329583)('newspaper', -0.0046948632484331895)复制代码
mean_squared_error(model.predict(x_test), y_test)复制代码
3.927556655626268复制代码