Prepare data import time import numpy as np import matplotlib.pyplot as plt from collections import OrderedDict from pyspark import SparkContext from pyspark.mllib.classification import LogisticRegressionWithSGD from pyspark.mllib.tree import RandomForest from pyspark.mllib.classification import LabeledPoint %matplotlib inline sc = SparkContext() clean.csv is original file with nan filled with mean of the column. features = sc.textFile(‘/home/sergey/MachineLearning/biline/clean.csv’) features = features.map(lambda…

  import numpy as np import matplotlib.pyplot as plt %matplotlib inline   from math import log def logloss(p, y): epsilon = 10e-12 if p == 0: p += epsilon if p == 1: p -= epsilon if y == 1: return -log(p) if y == 0: return -log(1-p) def evaluate_logloss(p,labels): return sum(list(map(lambda x: logloss(p,x), labels)))/len(labels)…

© 2014 In R we trust.
Top
Follow us: