Python Libraries for Data Science esp. for NLP – Natural Language Processing

For NLP tasks, either you will come across these libraries or you will have to use many of these Python libraries.

import nltk
# tokenizer
nltk.download("punkt")

# stop words
nltk.download("stopwords")
from nltk.tokenize import TreebankWordTokenizer
from nltk.tokenize import WordPunctTokenizer
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
import os.path
import re

from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
import nltk
from nltk.tokenize import TreebankWordTokenizer
from nltk.tokenize import WordPunctTokenizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk
from nltk.util import ngrams
from collections import Counter
from nltk.collocations import *
from nltk.tokenize import word_tokenize
from nltk.probability import ConditionalFreqDist, FreqDist
from nltk.probability import ConditionalProbDist, LaplaceProbDist
from nltk.corpus import stopwords
from nltk.metrics import TrigramAssocMeasures
from nltk.tokenize import TreebankWordTokenizer
#from nltk.probability import *
import math
from nltk.metrics import TrigramAssocMeasures
from nltk.metrics import BigramAssocMeasures
from nltk.metrics import BigramAssocMeasures

import math
import random
from collections import Counter, defaultdict

import nltk
nltk.download("gutenberg")

from nltk.corpus import gutenberg
from nltk.util import ngrams

import csv
from numpy import array
from numpy import asarray
from numpy import zeros
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Embedding
import matplotlib.pyplot as plt

import nltk

nltk.download(‘averaged_perceptron_tagger’)

import nltk
from nltk.corpus import treebank

import nltk
from nltk.tag import StanfordNERTagger
from nltk.metrics.scores import accuracy

import nltk
from nltk.corpus import treebank
from nltk.classify import maxent

from __future__ import print_function, unicode_literals, division

import re
import itertools

from six.moves import map, zip

from nltk.probability import (
FreqDist,
ConditionalFreqDist,
ConditionalProbDist,
DictionaryProbDist,
DictionaryConditionalProbDist,
LidstoneProbDist,
MutableProbDist,
MLEProbDist,
RandomProbDist,
)
from nltk.metrics import accuracy
from nltk.util import LazyMap, unique_list
from nltk.compat import python_2_unicode_compatible
from nltk.tag.api import TaggerI

import matplotlib as mpl
import matplotlib.pyplot as plt

import numpy as np

from sklearn import datasets
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import StratifiedKFold

import tensorflow as tf
import numpy as np
import random

import keras
from keras.layers import Dense
from keras import models
from keras import layers
from keras.layers import Activation, Dense

from keras import optimizers

from gensim.summarization import summarize

from gensim.summarization import keywords
from sklearn.datasets import fetch_20newsgroups
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
import string
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.decomposition import TruncatedSVD
nltk.download("averaged_perceptron_tagger")
nltk.download(‘tagsets’)
nltk.help.upenn_tagset("JJS")
nltk.download(‘treebank’)
nltk.download(‘ brown’)
nltk.download(‘universal_tagset’)
import nltk
from nltk.corpus import treebank
import nltk
from nltk.corpus import treebank
from bs4 import BeautifulSoup # For HTML parsing
import urllib # Website connections
import re # Regular expressions
from time import sleep # To prevent overwhelming the server between connections
from collections import Counter # Keep track of our term counts
from nltk.corpus import stopwords # Filter out stopwords, such as ‘the’, ‘or’, ‘and’
import pandas as pd # For converting results to a dataframe and bar chart plots
import numpy as np
import copy
%matplotlib inline
from sklearn.mixture import GaussianMixture
from sklearn.feature_extraction.text import TfidfVectorizer
import operator

from sklearn.datasets import load_files
import nltk
import string
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import PorterStemmer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import precision_recall_fscore_support
import pandas as pd
import nltk
from nltk.corpus import treebank
import tensorflow as tf
import keras
from keras.layers import Dense
from keras import models
from keras import Sequential

from numpy import array
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from sklearn.datasets import load_files
import nltk
import string
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import PorterStemmer
from keras import models
from numpy import array
from keras.preprocessing.text import one_hot

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from keras.preprocessing.text import one_hot

from numpy import array
from numpy import asarray
from numpy import zeros
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Embedding

from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
from keras.layers.merge import add

from seqeval.metrics import precision_score, recall_score, f1_score, classification_report

from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
from keras.layers.merge import add
from keras import models, layers

from numpy import zeros

*** ***. ***
Note: Older short-notes from this site are posted on Medium: https://medium.com/@SayedAhmedCanada

*** . *** *** . *** . *** . ***

Sayed Ahmed

BSc. Eng. in Comp. Sc. & Eng. (BUET)
MSc. in Comp. Sc. (U of Manitoba, Canada)
MSc. in Data Science and Analytics (Ryerson University, Canada)
Linkedin: https://ca.linkedin.com/in/sayedjustetc

Blog: http://Bangla.SaLearningSchool.com, http://SitesTree.com
Online and Offline Training: http://Training.SitesTree.com (Also, can be free and low cost sometimes)

Facebook Group/Form to discuss (Q & A): https://www.facebook.com/banglasalearningschool

Our free or paid training events: https://www.facebook.com/justetcsocial

Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. http://sitestree.com/training/

If you want to contribute to occasional free and/or low cost online/offline training or charitable/non-profit work in the education/health/social service sector, you can financially contribute to: safoundation at salearningschool.com using Paypal or Credit Card (on http://sitestree.com/training/enrol/index.php?id=114 ).

Misc. Plot – 4

Ref: https://www.wolframalpha.com/

Misc Plots – 3

Misc Plots

Misc Plots:
Ref: https://www.wolframalpha.com/


***. ***. ***
Note: Older short-notes from this site are posted on Medium: https://medium.com/@SayedAhmedCanada

*** . *** *** . *** . *** . ***

Sayed Ahmed

BSc. Eng. in Comp. Sc. & Eng. (BUET)
MSc. in Comp. Sc. (U of Manitoba, Canada)
MSc. in Data Science and Analytics (Ryerson University, Canada)
Linkedin: https://ca.linkedin.com/in/sayedjustetc

Blog: http://Bangla.SaLearningSchool.com, http://SitesTree.com
Online and Offline Training: http://Training.SitesTree.com (Also, can be free and low cost sometimes)

Facebook Group/Form to discuss (Q & A): https://www.facebook.com/banglasalearningschool

Our free or paid training events: https://www.facebook.com/justetcsocial

Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. http://sitestree.com/training/

If you want to contribute to occasional free and/or low cost online/offline training or charitable/non-profit work in the education/health/social service sector, you can financially contribute to: safoundation at salearningschool.com using Paypal or Credit Card (on http://sitestree.com/training/enrol/index.php?id=114 ).

Misc. Plots

Ref: https://www.wolframalpha.com/

—-
Note: Older short-notes from this site are posted on Medium: https://medium.com/@SayedAhmedCanada

*** . *** *** . *** . *** . ***

Sayed Ahmed

BSc. Eng. in Comp. Sc. & Eng. (BUET)
MSc. in Comp. Sc. (U of Manitoba, Canada)
MSc. in Data Science and Analytics (Ryerson University, Canada)
Linkedin: https://ca.linkedin.com/in/sayedjustetc

Blog: http://Bangla.SaLearningSchool.com, http://SitesTree.com
Online and Offline Training: http://Training.SitesTree.com (Also, can be free and low cost sometimes)

Facebook Group/Form to discuss (Q & A): https://www.facebook.com/banglasalearningschool

Our free or paid training events: https://www.facebook.com/justetcsocial

Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. http://sitestree.com/training/

If you want to contribute to occasional free and/or low cost online/offline training or charitable/non-profit work in the education/health/social service sector, you can financially contribute to: safoundation at salearningschool.com using Paypal or Credit Card (on http://sitestree.com/training/enrol/index.php?id=114 ).

Euclidean Norm of a Matrix

Euclidean Norm of a Matrix

Ref: http://mathworld.wolfram.com/FrobeniusNorm.html

***. ***. ***
Note: Older short-notes from this site are posted on Medium: https://medium.com/@SayedAhmedCanada

*** . *** *** . *** . *** . ***

Sayed Ahmed

BSc. Eng. in Comp. Sc. & Eng. (BUET)
MSc. in Comp. Sc. (U of Manitoba, Canada)
MSc. in Data Science and Analytics (Ryerson University, Canada)
Linkedin: https://ca.linkedin.com/in/sayedjustetc

Blog: http://Bangla.SaLearningSchool.com, http://SitesTree.com
Online and Offline Training: http://Training.SitesTree.com (Also, can be free and low cost sometimes)

Facebook Group/Form to discuss (Q & A): https://www.facebook.com/banglasalearningschool

Our free or paid training events: https://www.facebook.com/justetcsocial

Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. http://sitestree.com/training/

If you want to contribute to occasional free and/or low cost online/offline training or charitable/non-profit work in the education/health/social service sector, you can financially contribute to: safoundation at salearningschool.com using Paypal or Credit Card (on http://sitestree.com/training/enrol/index.php?id=114 ).

Misc. Data Science: Clustering

"Modelbased clustering assumes that the data were generated by a model and tries to recover the original model from the data. The model that we recover from the data then defines clusters and an assignment of documents to clusters. A commonly used criterion for estimating the model parameters is maximum likelihood.nlp.stanford.edu › IR-book › html › htmledition › model-based-clusteri…

Model-based clustering – Stanford NLP Group

"

"
Mixture models are also known as modelbased clustering. Modelbased clustering is a broad family of algorithms designed for modelling an unknown distribution as a mixture of simpler distributions, sometimes called basis distributions.

www.sciencedirect.com › topics › medicine-and-dentistry › model-based…

Model-Based Clustering – an overview | ScienceDirect Topics

"

"
Mixture model

Description

In statistics, a mixture model is a probabilistic model for representing the presence of subpopulations within an overall population, without requiring that an observed data set should identify the sub-population to which an individual observation belongs. Wikipedia

"

Introduction to Mixture Models
https://stephens999.github.io/fiveMinuteStats/intro_to_mixture_models.html

***. ***. ***
Note: Older short-notes from this site are posted on Medium: https://medium.com/@SayedAhmedCanada

*** . *** *** . *** . *** . ***

Sayed Ahmed

BSc. Eng. in Comp. Sc. & Eng. (BUET)
MSc. in Comp. Sc. (U of Manitoba, Canada)
MSc. in Data Science and Analytics (Ryerson University, Canada)
Linkedin: https://ca.linkedin.com/in/sayedjustetc

Blog: http://Bangla.SaLearningSchool.com, http://SitesTree.com
Online and Offline Training: http://Training.SitesTree.com (Also, can be free and low cost sometimes)

Facebook Group/Form to discuss (Q & A): https://www.facebook.com/banglasalearningschool

Our free or paid training events: https://www.facebook.com/justetcsocial

Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. http://sitestree.com/training/

If you want to contribute to occasional free and/or low cost online/offline training or charitable/non-profit work in the education/health/social service sector, you can financially contribute to: safoundation at salearningschool.com using Paypal or Credit Card (on http://sitestree.com/training/enrol/index.php?id=114 ).

Misc. Optimization Resources

L0 Norm, L1 Norm, L2 Norm & L-Infinity Norm

https://medium.com/@montjoile/l0-norm-l1-norm-l2-norm-l-infinity-norm-7a7d18a4f40c

***

Iterative Solutions of Linear Systems

https://www.math.uh.edu/~jingqiu/math4364/iterative_linear_system.pdf

***

How statistical Norms improve modeling
https://towardsdatascience.com/norms-penalties-and-multitask-learning-2f1db5f97c1f

Project Example: Optimization:
http://www.cs.cmu.edu/~aarti/Class/10725_Fall17/past_projects.html

https://web.stanford.edu/class/ee392o/#projects

https://ece.uwaterloo.ca/~ece602/Projects/2017/Project21/main.html

Area and Project Example:
http://www.ece.tufts.edu/ee/194CO/project_14.pdf

Sensor and Optimization: Could be a good read.
http://homepages.rpi.edu/~mitchj/phdtheses/daryn/ramsdd.pdf

***. ***. ***

Note: Older short-notes from this site are posted on Medium: https://medium.com/@SayedAhmedCanada

*** . *** *** . *** . *** . ***

Sayed Ahmed

BSc. Eng. in Comp. Sc. & Eng. (BUET)
MSc. in Comp. Sc. (U of Manitoba, Canada)
MSc. in Data Science and Analytics (Ryerson University, Canada)
Linkedin: https://ca.linkedin.com/in/sayedjustetc

Blog: http://Bangla.SaLearningSchool.com, http://SitesTree.com
Online and Offline Training: http://Training.SitesTree.com (Also, can be free and low cost sometimes)

Facebook Group/Form to discuss (Q & A): https://www.facebook.com/banglasalearningschool

Our free or paid training events: https://www.facebook.com/justetcsocial

Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. http://sitestree.com/training/

If you want to contribute to occasional free and/or low cost online/offline training or charitable/non-profit work in the education/health/social service sector, you can financially contribute to: safoundation at salearningschool.com using Paypal or Credit Card (on http://sitestree.com/training/enrol/index.php?id=114 ).

Part -1 : Bootstrapping, Bagging, Random Forests

What is a Classification Tree:

www.solver.com › classification-tree

Classification Tree | solver

A Classification tree labels, records, and assigns variables to discrete classes. A Classification tree can also provide a measure of confidence that the classification is correct. A Classification tree is built through a process known as binary recursive partitioning.

Pros and Cons of Classification Trees

Advantages:

  1. Requires less effort for data preparation
  2. normalization not required
  3. scaling of data not required
  4. Missing values in the data does not affect tree building that much
  5. Easy to explain

Disadvantage:

  1. Small data change causes a large change in the decision tree
  2. sometimes calculation can become far more complex
  3. higher time to train the model
  4. relatively expensive

https://medium.com/@dhiraj8899/top-5-advantages-and-disadvantages-of-decision-tree-algorithm-428ebd199d9a

What is Ensemble Learning?

"In statistics and machine learning, ensemble methods use multiple learning algorithms to obtain better predictive performance than could be obtained from any of the constituent learning algorithms alone." Wikipedia

blog.statsbot.co › ensemble-learning-d1dcd548e936

Ensemble Learning to Improve Machine Learning Results

"Aug 22, 2017 – Ensemble methods are meta-algorithms that combine several machine learning techniques into one predictive model in order to decrease variance (bagging), bias (boosting), or improve predictions (stacking)."

What is Bootstraping?

"In statistics, bootstrapping is any test or metric that relies on random sampling with replacement. Bootstrapping allows assigning measures of accuracy (defined in terms of bias, variance, confidence intervals, prediction error or some other such measure) to sample estimates."

en.wikipedia.org › wiki › Bootstrapping_(statistics)

Bootstrapping (statistics) – Wikipedia

Bagging Steps:
"Suppose there are N observations and M features in training data set. A sample from training data set is taken randomly with replacement. A subset of M features are selected randomly and whichever feature gives the best split is used to split the node iteratively. The tree is grown to the largest.Feb 19, 2018"

analyticsindiamag.com › primer-ensemble-learning-bagging-boosting

Bagging and Boosting – Analytics India Magazine

***. ***. ***
Note: Older short-notes from this site are posted on Medium: https://medium.com/@SayedAhmedCanada

*** . *** *** . *** . *** . ***

Sayed Ahmed

BSc. Eng. in Comp. Sc. & Eng. (BUET)
MSc. in Comp. Sc. (U of Manitoba, Canada)
MSc. in Data Science and Analytics (Ryerson University, Canada)
Linkedin: https://ca.linkedin.com/in/sayedjustetc

Blog: http://Bangla.SaLearningSchool.com, http://SitesTree.com
Online and Offline Training: http://Training.SitesTree.com (Also, can be free and low cost sometimes)

Facebook Group/Form to discuss (Q & A): https://www.facebook.com/banglasalearningschool

Our free or paid training events: https://www.facebook.com/justetcsocial

Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. http://sitestree.com/training/

If you want to contribute to occasional free and/or low cost online/offline training or charitable/non-profit work in the education/health/social service sector, you can financially contribute to: safoundation at salearningschool.com using Paypal or Credit Card (on http://sitestree.com/training/enrol/index.php?id=114 ).

KL Divergence: Entropy: Cross Entropy: Example Use Cases. Equations as well.

KL Divergence in Picture and Examples

“Kullback–Leibler divergence is the difference between the Cross Entropy H for PQ and the true Entropy H for P.”

KL

KL

[1]

“And this is what we use as a loss function while training Neural Networks. When we have an image classification problem, the training data and corresponding correct labels represent P, the true distribution. The NN predictions are our estimations Q.”

Reference for the above (including image) : https://towardsdatascience.com/entropy-cross-entropy-kl-divergence-binary-cross-entropy-cb8f72e72e65
The above URL is a pretty great read.

****
Everything below is from the Internet including images and equations esp. from [1]

What’s the KL Divergence?

The Kullback-Leibler divergence (hereafter written as KL divergence) is a measure of how a probability distribution differs from another probability distribution.

The KL divergence measures the distance from the approximate distribution QQ to the true distribution PP

.”

KL Divergence from Q to P

[1]

not a distance metric, not symmetric

Can be written as:

[1]

First term is the is the cross entropy between
PP and Q. Second term is the entropy of P

Forward and Reverse KL

Forward: mean seeking behaviour. Where P (.) has High Probability, Q (.) will also have to have high probability.

Kind of will approximate around mean. P = the one with two peaks. Q kind of took mean.

[1]

Reverse KL: Mode Seeking Behaviour
Where Q (.) has High Probability, P (.) will also have to have high probability.

[1]

References:
[1] https://dibyaghosh.com/blog/probability/kldivergence.html
[2] https://towardsdatascience.com/light-on-math-machine-learning-intuitive-guide-to-understanding-kl-divergence-2b382ca2b2a8

*** ***

“What is KL divergence used for?
Very often in Probability and Statistics we’ll replace observed data or a complex distributions with a simpler, approximating distribution. KL Divergence helps us to measure just how much information we lose when we choose an approximation.May 10, 2017

www.countbayesie.com › blog › kullback-leibler-divergence-explained

 

 

Kullback-Leibler Divergence Explained — Count Bayesie

***. ***. ***
Note: Older short-notes from this site are posted on Medium: https://medium.com/@SayedAhmedCanada

*** . *** *** . *** . *** . ***

Sayed Ahmed

BSc. Eng. in Comp. Sc. & Eng. (BUET)
MSc. in Comp. Sc. (U of Manitoba, Canada)
MSc. in Data Science and Analytics (Ryerson University, Canada)
Linkedin: https://ca.linkedin.com/in/sayedjustetc

Blog: http://Bangla.SaLearningSchool.com, http://SitesTree.com
Online and Offline Training: http://Training.SitesTree.com (Also, can be free and low cost sometimes)

Facebook Group/Form to discuss (Q & A): https://www.facebook.com/banglasalearningschool

Our free or paid training events: https://www.facebook.com/justetcsocial

Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. http://sitestree.com/training/