{"id":16923,"date":"2020-02-14T19:34:55","date_gmt":"2020-02-15T00:34:55","guid":{"rendered":"http:\/\/bangla.salearningschool.com\/recent-posts\/python-libraries-for-data-science-esp-for-nlp-natural-language-processing\/"},"modified":"2020-02-14T19:34:55","modified_gmt":"2020-02-15T00:34:55","slug":"python-libraries-for-data-science-esp-for-nlp-natural-language-processing","status":"publish","type":"post","link":"http:\/\/bangla.sitestree.com\/?p=16923","title":{"rendered":"Python Libraries for Data Science esp. for NLP &#8211; Natural Language Processing"},"content":{"rendered":"<p>For NLP tasks, either you will come across these libraries or you will have to use many of these Python libraries.<\/p>\n<p>import nltk<br \/>\n# tokenizer<br \/>\nnltk.download(&quot;punkt&quot;)<\/p>\n<p># stop words<br \/>\nnltk.download(&quot;stopwords&quot;)<br \/>\nfrom nltk.tokenize import TreebankWordTokenizer<br \/>\nfrom nltk.tokenize import WordPunctTokenizer<br \/>\nfrom nltk.tokenize import RegexpTokenizer<br \/>\nfrom nltk.tokenize import sent_tokenize<br \/>\nfrom nltk.corpus import stopwords<br \/>\nimport os.path<br \/>\nimport re<\/p>\n<p>from nltk.tokenize import RegexpTokenizer<br \/>\nfrom nltk.corpus import stopwords<br \/>\nfrom nltk.tokenize import RegexpTokenizer<br \/>\nimport nltk<br \/>\nfrom nltk.tokenize import TreebankWordTokenizer<br \/>\nfrom nltk.tokenize import WordPunctTokenizer<br \/>\nfrom nltk.tokenize import word_tokenize<br \/>\nfrom nltk.corpus import stopwords<br \/>\nimport nltk<br \/>\nfrom nltk.util import ngrams<br \/>\nfrom collections import Counter<br \/>\nfrom nltk.collocations import *<br \/>\nfrom nltk.tokenize import word_tokenize<br \/>\nfrom nltk.probability import ConditionalFreqDist, FreqDist<br \/>\nfrom nltk.probability import ConditionalProbDist, LaplaceProbDist<br \/>\nfrom nltk.corpus import stopwords<br \/>\nfrom nltk.metrics import TrigramAssocMeasures<br \/>\nfrom nltk.tokenize import TreebankWordTokenizer<br \/>\n#from nltk.probability import *<br \/>\nimport math<br \/>\nfrom nltk.metrics import TrigramAssocMeasures<br \/>\nfrom nltk.metrics import BigramAssocMeasures<br \/>\nfrom nltk.metrics import BigramAssocMeasures<\/p>\n<p>import math<br \/>\nimport random<br \/>\nfrom collections import Counter, defaultdict<\/p>\n<p>import nltk<br \/>\nnltk.download(&quot;gutenberg&quot;)<\/p>\n<p>from nltk.corpus import gutenberg<br \/>\nfrom nltk.util import ngrams<\/p>\n<p>import csv<br \/>\nfrom numpy import array<br \/>\nfrom numpy import asarray<br \/>\nfrom numpy import zeros<br \/>\nfrom keras.preprocessing.text import Tokenizer<br \/>\nfrom keras.preprocessing.sequence import pad_sequences<br \/>\nfrom keras.models import Sequential<br \/>\nfrom keras.layers import Dense<br \/>\nfrom keras.layers import Flatten<br \/>\nfrom keras.layers import Embedding<br \/>\nimport matplotlib.pyplot as plt<\/p>\n<p>import nltk<\/p>\n<p>nltk.download(&#8216;averaged_perceptron_tagger&#8217;)<\/p>\n<p>import nltk<br \/>\nfrom nltk.corpus import treebank<\/p>\n<p>import nltk<br \/>\nfrom nltk.tag import StanfordNERTagger<br \/>\nfrom nltk.metrics.scores import accuracy<\/p>\n<p>import nltk<br \/>\nfrom nltk.corpus import treebank<br \/>\nfrom nltk.classify import maxent<\/p>\n<p>from __future__ import print_function, unicode_literals, division<\/p>\n<p>import re<br \/>\nimport itertools<\/p>\n<p>from six.moves import map, zip<\/p>\n<p>from nltk.probability import (<br \/>\nFreqDist,<br \/>\nConditionalFreqDist,<br \/>\nConditionalProbDist,<br \/>\nDictionaryProbDist,<br \/>\nDictionaryConditionalProbDist,<br \/>\nLidstoneProbDist,<br \/>\nMutableProbDist,<br \/>\nMLEProbDist,<br \/>\nRandomProbDist,<br \/>\n)<br \/>\nfrom nltk.metrics import accuracy<br \/>\nfrom nltk.util import LazyMap, unique_list<br \/>\nfrom nltk.compat import python_2_unicode_compatible<br \/>\nfrom nltk.tag.api import TaggerI<\/p>\n<p>import matplotlib as mpl<br \/>\nimport matplotlib.pyplot as plt<\/p>\n<p>import numpy as np<\/p>\n<p>from sklearn import datasets<br \/>\nfrom sklearn.mixture import GaussianMixture<br \/>\nfrom sklearn.model_selection import StratifiedKFold<\/p>\n<p>import tensorflow as tf<br \/>\nimport numpy as np<br \/>\nimport random<\/p>\n<p>import keras<br \/>\nfrom keras.layers import Dense<br \/>\nfrom keras import models<br \/>\nfrom keras import layers<br \/>\nfrom keras.layers import Activation, Dense<\/p>\n<p>from keras import optimizers<\/p>\n<p>from gensim.summarization import summarize<\/p>\n<p>from gensim.summarization import keywords<br \/>\nfrom sklearn.datasets import fetch_20newsgroups<br \/>\nfrom nltk.corpus import wordnet<br \/>\nfrom nltk.stem import WordNetLemmatizer<br \/>\nimport string<br \/>\nfrom sklearn.feature_extraction.text import CountVectorizer<br \/>\nfrom sklearn.decomposition import LatentDirichletAllocation<br \/>\nfrom sklearn.decomposition import TruncatedSVD<br \/>\nnltk.download(&quot;averaged_perceptron_tagger&quot;)<br \/>\nnltk.download(&#8216;tagsets&#8217;)<br \/>\nnltk.help.upenn_tagset(&quot;JJS&quot;)<br \/>\nnltk.download(&#8216;treebank&#8217;)<br \/>\nnltk.download(&#8216; brown&#8217;)<br \/>\nnltk.download(&#8216;universal_tagset&#8217;)<br \/>\nimport nltk<br \/>\nfrom nltk.corpus import treebank<br \/>\nimport nltk<br \/>\nfrom nltk.corpus import treebank<br \/>\nfrom bs4 import BeautifulSoup # For HTML parsing<br \/>\nimport urllib # Website connections<br \/>\nimport re # Regular expressions<br \/>\nfrom time import sleep # To prevent overwhelming the server between connections<br \/>\nfrom collections import Counter # Keep track of our term counts<br \/>\nfrom nltk.corpus import stopwords # Filter out stopwords, such as &#8216;the&#8217;, &#8216;or&#8217;, &#8216;and&#8217;<br \/>\nimport pandas as pd # For converting results to a dataframe and bar chart plots<br \/>\nimport numpy as np<br \/>\nimport copy<br \/>\n%matplotlib inline<br \/>\nfrom sklearn.mixture import GaussianMixture<br \/>\nfrom sklearn.feature_extraction.text import TfidfVectorizer<br \/>\nimport operator<\/p>\n<p>from sklearn.datasets import load_files<br \/>\nimport nltk<br \/>\nimport string<br \/>\nfrom sklearn.feature_extraction.text import CountVectorizer<br \/>\nfrom nltk.stem import PorterStemmer<br \/>\nfrom sklearn.naive_bayes import MultinomialNB<br \/>\nfrom sklearn.metrics import precision_recall_fscore_support<br \/>\nimport pandas as pd<br \/>\nimport nltk<br \/>\nfrom nltk.corpus import treebank<br \/>\nimport tensorflow as tf<br \/>\nimport keras<br \/>\nfrom keras.layers import Dense<br \/>\nfrom keras import models<br \/>\nfrom keras import Sequential<\/p>\n<p>from numpy import array<br \/>\nfrom keras.preprocessing.text import one_hot<br \/>\nfrom keras.preprocessing.sequence import pad_sequences<br \/>\nfrom keras.models import Sequential<br \/>\nfrom keras.layers import Dense<br \/>\nfrom keras.layers import Flatten<br \/>\nfrom keras.layers.embeddings import Embedding<br \/>\nfrom sklearn.datasets import load_files<br \/>\nimport nltk<br \/>\nimport string<br \/>\nfrom sklearn.feature_extraction.text import CountVectorizer<br \/>\nfrom nltk.stem import PorterStemmer<br \/>\nfrom keras import models<br \/>\nfrom numpy import array<br \/>\nfrom keras.preprocessing.text import one_hot<\/p>\n<p>from keras.models import Sequential<br \/>\nfrom keras.layers import Dense<br \/>\nfrom keras.layers import Flatten<br \/>\nfrom keras.layers.embeddings import Embedding<br \/>\nfrom keras.preprocessing.text import one_hot<\/p>\n<p>from numpy import array<br \/>\nfrom numpy import asarray<br \/>\nfrom numpy import zeros<br \/>\nimport pandas as pd<br \/>\nfrom keras.preprocessing.text import Tokenizer<br \/>\nfrom keras.preprocessing.sequence import pad_sequences<br \/>\nfrom keras.models import Sequential<br \/>\nfrom keras.layers import Dense<br \/>\nfrom keras.layers import Flatten<br \/>\nfrom keras.layers import Embedding<\/p>\n<p>from keras.utils import to_categorical<br \/>\nfrom sklearn.model_selection import train_test_split<\/p>\n<p>from keras.models import Model, Input<br \/>\nfrom keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional<br \/>\nfrom keras.layers.merge import add<\/p>\n<p>from seqeval.metrics import precision_score, recall_score, f1_score, classification_report<\/p>\n<p>from keras.models import Model, Input<br \/>\nfrom keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional<br \/>\nfrom keras.layers.merge import add<br \/>\nfrom keras import models, layers<\/p>\n<p>from numpy import zeros<\/p>\n<p>*** ***. ***<br \/>\n<em><strong>Note: Older short-notes from this site are posted on Medium: <\/strong><\/em><a href=\"https:\/\/medium.com\/@SayedAhmedCanada\">https:\/\/medium.com\/@SayedAhmedCanada<\/a><\/p>\n<p>*** . *** *** . *** . *** . ***<br \/>\n<em><\/em><br \/>\n<em><strong>Sayed Ahmed<\/strong><br \/>\n<\/em><br \/>\n<em><strong>BSc. Eng. in Comp. Sc. &amp; Eng. (BUET)<\/strong><\/em><br \/>\n<em><strong>MSc. in Comp. Sc. (U of Manitoba, Canada)<\/strong><\/em><br \/>\n<em><strong>MSc. in Data Science and Analytics (Ryerson University, Canada)<\/strong><\/em><br \/>\n<em><strong>Linkedin<\/strong>: <a href=\"https:\/\/ca.linkedin.com\/in\/sayedjustetc\">https:\/\/ca.linkedin.com\/in\/sayedjustetc<\/a><br \/>\n<\/em><\/p>\n<p><em><strong>Blog<\/strong>: <a href=\"http:\/\/bangla.salearningschool.com\/\">http:\/\/Bangla.SaLearningSchool.com<\/a>, <a href=\"http:\/\/sitestree.com\">http:\/\/SitesTree.com<\/a><\/em><br \/>\n<em><strong>Online and Offline Training<\/strong>: <a href=\"http:\/\/training.SitesTree.com\">http:\/\/Training.SitesTree.com<\/a> (Also, can be free and low cost sometimes)<\/em><\/p>\n<p><em>Facebook Group\/Form to discuss (Q &amp; A): <\/em><a href=\"https:\/\/www.facebook.com\/banglasalearningschool\">https:\/\/www.facebook.com\/banglasalearningschool<\/a><\/p>\n<p>Our free or paid training events: <a href=\"https:\/\/www.facebook.com\/justetcsocial\">https:\/\/www.facebook.com\/justetcsocial<\/a><\/p>\n<p><em>Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. <\/em><a href=\"http:\/\/sitestree.com\/training\/\">http:\/\/sitestree.com\/training\/<\/a><\/p>\n<p><em><strong>I<\/strong>f you want to contribute to occasional free and\/or low cost online\/offline training or charitable\/non-profit work in the education\/health\/social service sector, you can financially contribute to: safoundation at <a href=\"http:\/\/salearningschool.com\">salearningschool.com<\/a> using Paypal or Credit Card (on <\/em><a href=\"http:\/\/sitestree.com\/training\/enrol\/index.php?id=114\">http:\/\/sitestree.com\/training\/enrol\/index.php?id=114<\/a> <em>).<\/em><\/p>\n","protected":false},"excerpt":{"rendered":"<p>For NLP tasks, either you will come across these libraries or you will have to use many of these Python libraries. import nltk # tokenizer nltk.download(&quot;punkt&quot;) # stop words nltk.download(&quot;stopwords&quot;) from nltk.tokenize import TreebankWordTokenizer from nltk.tokenize import WordPunctTokenizer from nltk.tokenize import RegexpTokenizer from nltk.tokenize import sent_tokenize from nltk.corpus import stopwords import os.path import re from &hellip; <\/p>\n<p><a class=\"more-link btn\" href=\"http:\/\/bangla.sitestree.com\/?p=16923\">Continue reading<\/a><\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[182],"tags":[],"class_list":["post-16923","post","type-post","status-publish","format-standard","hentry","category---blog","item-wrap"],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack-related-posts":[{"id":76086,"url":"http:\/\/bangla.sitestree.com\/?p=76086","url_meta":{"origin":16923,"position":0},"title":"Library Import in Python for ML\/Graph ML","author":"Sayed","date":"May 19, 2024","format":false,"excerpt":"import libraries import os import pandas as pd import math Import Libraries for Graph, GNN (Graph Neural Network), and GCN (Graph Convolutional Network) import stellargraph as sg from stellargraph import StellarGraph from stellargraph.layer import DeepGraphCNN from stellargraph.mapper import FullBatchNodeGenerator from stellargraph.mapper import PaddedGraphGenerator from stellargraph.layer import GCN Machine Learning related\u2026","rel":"","context":"In &quot;\u09ac\u09cd\u09b2\u0997 \u0964 Blog&quot;","block_context":{"text":"\u09ac\u09cd\u09b2\u0997 \u0964 Blog","link":"http:\/\/bangla.sitestree.com\/?cat=182"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":76436,"url":"http:\/\/bangla.sitestree.com\/?p=76436","url_meta":{"origin":16923,"position":1},"title":"1. Libraries used for the project: Predict Future Stock Price using Graph Theory, Machine Learning and Deep Learning)","author":"Sayed","date":"December 4, 2024","format":false,"excerpt":"#import libraries import osimport pandas as pdimport math #Import Libraries for Graph, GNN, and GCN import stellargraph as sgfrom stellargraph import StellarGraphfrom stellargraph.layer import DeepGraphCNNfrom stellargraph.mapper import FullBatchNodeGeneratorfrom stellargraph.mapper import PaddedGraphGeneratorfrom stellargraph.layer import GCN #Machine Learnig related library Imports from tensorflow.keras import layers, optimizers, losses, metrics, Modelfrom sklearn import preprocessing,\u2026","rel":"","context":"In &quot;Code: Predict Future Stock Price using Graph Theory, Machine Learning and Deep Learning)&quot;","block_context":{"text":"Code: Predict Future Stock Price using Graph Theory, Machine Learning and Deep Learning)","link":"http:\/\/bangla.sitestree.com\/?cat=1969"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":76075,"url":"http:\/\/bangla.sitestree.com\/?p=76075","url_meta":{"origin":16923,"position":2},"title":"K-Means Clustering","author":"Sayed","date":"May 18, 2024","format":false,"excerpt":"Click on the images to see them clearly #!\/usr\/bin\/env python coding: utf-8 In[1]: k-means clustering from numpy import unique from numpy import where from sklearn.datasets import make_classification from sklearn.cluster import KMeans from matplotlib import pyplot import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as\u2026","rel":"","context":"In &quot;\u09ac\u09cd\u09b2\u0997 \u0964 Blog&quot;","block_context":{"text":"\u09ac\u09cd\u09b2\u0997 \u0964 Blog","link":"http:\/\/bangla.sitestree.com\/?cat=182"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/bangla.sitestree.com\/wp-content\/uploads\/2024\/05\/image-40.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/bangla.sitestree.com\/wp-content\/uploads\/2024\/05\/image-40.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/bangla.sitestree.com\/wp-content\/uploads\/2024\/05\/image-40.png?resize=525%2C300 1.5x"},"classes":[]},{"id":74659,"url":"http:\/\/bangla.sitestree.com\/?p=74659","url_meta":{"origin":16923,"position":3},"title":"Python and MongoDB Operations. Code Example","author":"Sayed","date":"May 17, 2022","format":false,"excerpt":"#!\/usr\/bin\/env python# coding: utf-8# In[63]: import pymongo; # In[64]: import pymongomyclient = pymongo.MongoClient(\"mongodb:\/\/localhost:27017\/\")mydb = myclient[\"mydatabase\"] # In[65]: # Check if database exist # In[66]: print(myclient.list_database_names()) # In[67]: dblist = myclient.list_database_names()if \"mydatabase\" in dblist:\u00a0 print(\"The database exists.\") # In[68]: # Create a collection # In[69]: import pymongomyclient = pymongo.MongoClient(\"mongodb:\/\/localhost:27017\/\")mydb = myclient[\"mydatabase\"]mycol\u2026","rel":"","context":"In &quot;Python&quot;","block_context":{"text":"Python","link":"http:\/\/bangla.sitestree.com\/?cat=1428"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":76580,"url":"http:\/\/bangla.sitestree.com\/?p=76580","url_meta":{"origin":16923,"position":4},"title":"3D Scatter Plot in Python","author":"Sayed","date":"January 12, 2025","format":false,"excerpt":"Visualizing 3-D numeric data with Scatter Plots length, breadth and depth Ref: https:\/\/towardsdatascience.com\/the-art-of-effective-visualization-of-multi-dimensional-data-6c7202990c57 import pandas as pdimport matplotlib.pyplot as pltfrom mpl_toolkits.mplot3d import Axes3Dimport matplotlib as mplimport numpy as npimport seaborn as sns%matplotlib inline fig = plt.figure(figsize=(8, 6))ax = fig.add_subplot(111, projection='3d') xs = wines['residual sugar']ys = wines['fixed acidity']zs = wines['alcohol']ax.scatter(xs, ys,\u2026","rel":"","context":"In &quot;Data Visualization&quot;","block_context":{"text":"Data Visualization","link":"http:\/\/bangla.sitestree.com\/?cat=1903"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/bangla.sitestree.com\/wp-content\/uploads\/2025\/01\/image-17.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/bangla.sitestree.com\/wp-content\/uploads\/2025\/01\/image-17.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/bangla.sitestree.com\/wp-content\/uploads\/2025\/01\/image-17.png?resize=525%2C300 1.5x, https:\/\/i0.wp.com\/bangla.sitestree.com\/wp-content\/uploads\/2025\/01\/image-17.png?resize=700%2C400 2x"},"classes":[]},{"id":76091,"url":"http:\/\/bangla.sitestree.com\/?p=76091","url_meta":{"origin":16923,"position":5},"title":"Spearman Correlation Coefficient and Graph Mining","author":"Sayed","date":"May 19, 2024","format":false,"excerpt":"#!\/usr\/bin\/env python coding: utf-8 # 3rd Model: Deepgraph CNN: Stock Price Prediction using DeepGraphCNN Neural Networks. It includes GCN layers and CNN layers. I have added an MLP at the last layer to predict stock prices. # # Input graphs were created for spearman, Spearman, and Kendal Tau correlations\/coefficients from\u2026","rel":"","context":"In &quot;\u09ac\u09cd\u09b2\u0997 \u0964 Blog&quot;","block_context":{"text":"\u09ac\u09cd\u09b2\u0997 \u0964 Blog","link":"http:\/\/bangla.sitestree.com\/?cat=182"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]}],"_links":{"self":[{"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/posts\/16923","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=16923"}],"version-history":[{"count":0,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/posts\/16923\/revisions"}],"wp:attachment":[{"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=16923"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=16923"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=16923"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}