{"id":19932,"date":"2021-02-07T22:49:34","date_gmt":"2021-02-08T03:49:34","guid":{"rendered":"http:\/\/bangla.salearningschool.com\/recent-posts\/?p=19932"},"modified":"2021-02-09T21:25:20","modified_gmt":"2021-02-10T02:25:20","slug":"understanding-indicators-used-to-measure-health-quality","status":"publish","type":"post","link":"http:\/\/bangla.sitestree.com\/?p=19932","title":{"rendered":"Understanding indicators used to measure health quality"},"content":{"rendered":"\n<figure class=\"wp-block-image\"><a href=\"https:\/\/medium.com\/@SayedAhmedCanada?source=post_page-----17683cc61553--------------------------------\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/fit\/c\/96\/96\/1%2AqIditOG5oO0RmWRR7Iw4bA.png?w=750&#038;ssl=1\" alt=\"Justetc Social Services (non-profit)\"\/><\/a><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"https:\/\/medium.com\/@SayedAhmedCanada?source=post_page-----17683cc61553--------------------------------\">Justetc Social Services (non-profit)<\/a><a href=\"https:\/\/medium.com\/health-system-performance\/understanding-indicators-used-to-measure-health-quality-17683cc61553?source=post_page-----17683cc61553--------------------------------\">Jan 31<\/a>&nbsp;\u00b7&nbsp;16&nbsp;min read<\/p>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"5033\">This code works with the data on the excel file: indicator-methodology.xls<\/p>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"69cf\">Purpose:<\/p>\n\n\n\n<ol class=\"wp-block-list\"><li>Find out prominant indicators \u2014 this might also mean the critical aspect of health<\/li><li>Find out the measurements that are used to find the quality<\/li><\/ol>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"9fe0\">Code Reference: This code heavily makes use of the code provided on the Text Visualization Lab. The methods might have been used as is i.e used as libraries<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\"># COMMENT IF PACKAGES ALREADY INSTALLED (if pip does not work use pip3)<br \/># !pip install nltk<br \/># !pip install wordcloud<br \/># !pip install pytagcloud<br \/># !pip install pygame<br \/># !pip install simplejson<br \/># !pip install bs4<br \/># !pip install networkx<br \/># !pip install gensimimport nltk<br \/>#nltkownload() ##choose stopwords to download<br \/>from nltk.corpus import stopwords<br \/>from nltk.util import ngrams<br \/>from bs4 import BeautifulSoup<br \/># import urllib2<br \/>import urllib.request as urllib2<br \/>import re<br \/>from collections import Counter<br \/>from wordcloud import WordCloud <br \/>import matplotlib.pyplot as plt<br \/>%matplotlib inline<br \/>from pytagcloud import create_tag_image,  make_tags, LAYOUT_MIX<br \/>import operator<br \/>from IPython.display import Image<br \/>import nltk.data<br \/>import networkx as nx<br \/>import sys<br \/>sys.setrecursionlimit(10000)<br \/>nltk.download('stopwords')<br \/>nltk.download('punkt')<br \/># nltk.download() ## download stopwords and punkt<br \/><br \/>import networkx as nx<br \/>import randompygame 1.9.4<br \/>Hello from the pygame community. https:\/\/www.pygame.org\/contribute.html<br \/><br \/><br \/>[nltk_data] Downloading package stopwords to C:\\Users\\Sayed<br \/>[nltk_data]     Ahmed\\AppData\\Roaming\\nltk_data...<br \/>[nltk_data]   Package stopwords is already up-to-date!<br \/>[nltk_data] Downloading package punkt to C:\\Users\\Sayed<br \/>[nltk_data]     Ahmed\\AppData\\Roaming\\nltk_data...<br \/>[nltk_data]   Package punkt is already up-to-date!# https:\/\/www.kaggle.com\/c\/word2vec-nlp-tutorial\/details\/part-1-for-beginners-bag-of-words<br \/>def clean_text( raw_review ):<br \/>    # Function to convert a raw review to a string of words<br \/>    # The input is a single string (a raw movie review), and <br \/>    # the output is a single string (a preprocessed movie review)<br \/>    #<br \/>    # 1. Remove HTML<br \/>    review_text = BeautifulSoup(raw_review) <br \/>    #Remove javascript elements<br \/>    for script in review_text([\"script\", \"style\"]):<br \/>        script.extract()    # rip it out<br \/><br \/>    # get text<br \/>    review_text = review_text.get_text()<br \/>    # 2. Remove non-letters        <br \/>    letters_only = re.sub(\"[^a-zA-Z]\", \" \", review_text) <br \/>    #<br \/>    # 3. Convert to lower case, split into individual words<br \/>    words = letters_only.lower().split()                             <br \/>    #<br \/>    # 4. In Python, searching a set is much faster than searching<br \/>    #   a list, so convert the stop words to a set<br \/>    stops = set(stopwords.words(\"english\"))<br \/>    # <br \/>    # 5. Remove stop words<br \/>    meaningful_words = [w for w in words if not w in stops]   <br \/>    #<br \/>    # 6. Join the words back into one string separated by space, <br \/>    # and return the result.<br \/>    return( \" \".join( meaningful_words )) <br \/><br \/><br \/>def constructtree(lst,tree,factor, parent=None):<br \/>    if lst==[]:<br \/>        return {}<br \/>    else:<br \/>        word =lst[0]<br \/>        if parent:<br \/>            edges.append((parent, word))<br \/>        else:<br \/>            edges.append(('root', word))<br \/>        if not word in tree.keys():          #tree.has_key(word):   for python 2<br \/>            tree[word]={'name':word,'value':1\/factor,\"children\":{}}<br \/>            tree[word][\"children\"]=constructtree(lst[1:],tree[word][\"children\"],factor, word)<br \/>        else:<br \/>            #print 22<br \/>            tree[word][\"value\"]+=1\/factor<br \/>            tree[word][\"children\"]=constructtree(lst[1:],tree[word][\"children\"],factor, word)<br \/>        return tree<br \/><br \/>def doall(wl,tree):<br \/>    for x in wl:<br \/>#             print (11,x)  #<br \/>#             print()<br \/>            tree2=constructtree(x,tree,1)<br \/>            tree=tree2<br \/>#     print (1,tree)   #<br \/>#     print()<br \/>    return tree, edges<br \/><br \/><br \/><br \/>def hierarchy_pos(G, root=None, width=10., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5):<br \/><br \/>    '''<br \/>    From Joel's answer at https:\/\/stackoverflow.com\/a\/29597209\/2966723 <br \/><br \/>    If the graph is a tree this will return the positions to plot this in a <br \/>    hierarchical layout.<br \/><br \/>    G: the graph (must be a tree)<br \/><br \/>    root: the root node of current branch <br \/>    - if the tree is directed and this is not given, the root will be found and used<br \/>    - if the tree is directed and this is given, then the positions will be just for the descendants of this node.<br \/>    - if the tree is undirected and not given, then a random choice will be used.<br \/><br \/>    width: horizontal space allocated for this branch - avoids overlap with other branches<br \/><br \/>    vert_gap: gap between levels of hierarchy<br \/><br \/>    vert_loc: vertical location of root<br \/><br \/>    xcenter: horizontal location of root<br \/>    '''<br \/>    if not nx.is_tree(G):<br \/>        raise TypeError('cannot use hierarchy_pos on a graph that is not a tree')<br \/><br \/>    if root is None:<br \/>        if isinstance(G, nx.DiGraph):<br \/>            root = next(iter(nx.topological_sort(G)))  #allows back compatibility with nx version 1.11<br \/>        else:<br \/>            root = random.choice(list(G.nodes))<br \/><br \/>    def _hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, pos = None, parent = None):<br \/>        '''<br \/>        see hierarchy_pos docstring for most arguments<br \/><br \/>        pos: a dict saying where all nodes go if they have been assigned<br \/>        parent: parent of this branch. - only affects it if non-directed<br \/><br \/>        '''<br \/><br \/>        if pos is None:<br \/>            pos = {root:(xcenter,vert_loc)}<br \/>        else:<br \/>            pos[root] = (xcenter, vert_loc)<br \/>        children = list(G.neighbors(root))<br \/>        if not isinstance(G, nx.DiGraph) and parent is not None:<br \/>            children.remove(parent)  <br \/>        if len(children)!=0:<br \/>            dx = width\/len(children) <br \/>            nextx = xcenter - width\/2 - dx\/2<br \/>            for child in children:<br \/>                nextx += dx<br \/>                pos = _hierarchy_pos(G,child, width = dx, vert_gap = vert_gap, <br \/>                                    vert_loc = vert_loc-vert_gap, xcenter=nextx,<br \/>                                    pos=pos, parent = root)<br \/>        return pos<br \/><br \/><br \/>    return _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)<\/pre>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"9397\">Fetch Indicator Data i.e. indicator methodology<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"b391\">This will give an idea what are the primary indicators used to measure healthcare quality<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">import pandas as pdindicator_methodology = pd.read_excel('..\/data\/indicator-methodology.xls')<br \/>indicator_methodology.columns<br \/>indicator_methodology.head()<\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2AE_DKN-K13SOnG1gnpxEuWA.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<pre class=\"wp-block-preformatted\">indicator_methodology['Indicator definition'][:5]0    Number of deaths due to cancer per 100,000 fem...<br \/>1    Number of deaths due to cancer per 100,000 males.<br \/>2    Number of deaths due to ischemic heart disease...<br \/>3    Number of deaths due to cerebrovascular diseas...<br \/>4    Number of deaths due to transport accidents \\n...<br \/>Name: Indicator definition, dtype: object# get all indicators in a raw text variable<br \/>raw_indicator = ''<br \/>raw_measure = ''<br \/><br \/>for aRow in range(indicator_methodology.shape[0]):<br \/>    raw_indicator += ' ' + indicator_methodology['Indicator label'][aRow] + str('')<br \/>    raw_measure += ' ' +  str(indicator_methodology['Indicator definition'][aRow])<br \/>    <br \/>    #raw = pd.merge(indicator_methodology['Indicator label'], indicator_methodology['Indicator definition'])<br \/>    <br \/>raw_indicator[:100], raw_measure[:100]<br \/>raw_measure' Number of deaths due to cancer per 100,000 females. Number of deaths due to cancer per 100,000 males. Number of deaths due to ischemic heart disease \\nper 100,000 population.Number of deaths due to ischemic heart disease \\nper 100,000 population.Number of deaths due to ischemic heart disease \\nper 100,000 population.Number of deaths due to ischemic heart disease \\nper 100,000 population. Number of deaths due to cerebrovascular diseases \\nper 100,000 population.Number of deaths due to cerebrovascular diseases \\nper 100,000 population.Number of deaths due to cerebrovascular diseases \\nper 100,000 population.Number of deaths due to cerebrovascular diseases \\nper 100,000 population. Number of deaths due to transport accidents \\nper 100,000 females.Number of deaths due to transport accidents \\nper 100,000 females.Number of deaths due to transport accidents \\nper 100,000 females.Number of deaths due to transport accidents \\nper 100,000 females. Number of deaths due to transport accidents \\nper 100,000 males.Number of deaths due to transport accidents \\nper 100,000 males.Number of deaths due to transport accidents \\nper 100,000 males.Number of deaths due to transport accidents \\nper 100,000 males. Number of deaths due to suicide per 100,000 females. Number of deaths due to suicide per 100,000 males. Deaths of children younger than 1 year per 1,000 live births. Percentage of the population age 15+ who report their health to be \u201cgood\u201d or better. Average number of years that a female can be expected to live, assuming that age-specific mortality levels remain constant. Average number of years that a male can be expected to live, assuming that age-specific mortality levels remain constant. Percentage of the population age 15+ who report eating fruit at least once per day. Percentage of the population age 15+ who report eating vegetables at least once per day. Percentage of the female population age 15+ who report that they are daily smokers. Percentage of the male population age 15+ who report that they are daily smokers. Average annual alcohol consumption in litres per capita \\n(age 15+).Average annual alcohol consumption in litres per capita \\n(age 15+).Average annual alcohol consumption in litres per capita \\n(age 15+). Percentage of adults who are obese (body mass index higher than 30 kg\/m\u00b2), self-report. Median wait time (in days) from specialist assessment \\n(booking date) to cataract surgery.Median wait time (in days) from specialist assessment \\n(booking date) to cataract surgery.Median wait time (in days) from specialist assessment \\n(booking date) to cataract surgery. Median wait time (in days) from specialist assessment \\n(booking date) to hip replacement.Median wait time (in days) from specialist assessment \\n(booking date) to hip replacement.Median wait time (in days) from specialist assessment \\n(booking date) to hip replacement. Median wait time (in days) from specialist assessment \\n(booking date) to knee replacement.Median wait time (in days) from specialist assessment \\n(booking date) to knee replacement.Median wait time (in days) from specialist assessment \\n(booking date) to knee replacement. Percentage of people able to get an appointment to see a doctor or a nurse on the same or next day last time they were sick or needed medical attention. Percentage of people who needed care after hours and reported difficulty getting medical care in the evenings, on weekends or on holidays without going to the hospital emergency department\/emergency room. Percentage of adults who waited for 4 weeks or more after they were advised to see or decided to see a specialist. Percentage of people who had a medical problem but did not consult\/visit a doctor because of the cost. Percentage of people with a regular doctor or place of care. Percentage of adults age 65+ who received an influenza vaccination within the past year. Number of hospital discharges for COPD of people age 15 and older per 100,000 population. Number of hospital discharges for asthma of people age 15 and older per 100,000 population. Number of hospital discharges for diabetes of people age 15 and older per 100,000 population. Percentage of adults who report that their regular doctor always or often spent enough time with them. Percentage of adults who report that their regular doctor always or often explains things in a way that is easy to\\xa0understand. Percentage of older adults (age 55+) who report that their regular doctor always or often gave them an opportunity to ask questions or raise concerns. Percentage of adults who report that their regular doctor always or often involved them as much as they wanted in decisions about their care and treatment. 5-year relative survival rate for breast cancer. Number of deaths due to breast cancer, per 100,000 females. 5-year relative survival rate for cervical cancer. Number of deaths due to cervical cancer, per 100,000 females. 5-year relative survival rate for colorectal cancer. Number of deaths due to colorectal cancer, \\nper 100,000 population.Number of deaths due to colorectal cancer, \\nper 100,000 population. Percentage of patients (age 45+) who die in hospital within 30 days of being admitted with a primary diagnosis of acute myocardial infarction (AMI). Percentage of patients (age 45+) who die in hospital within 30 days of being admitted with a primary diagnosis of \\nischemic stroke.Percentage of patients (age 45+) who die in hospital within 30 days of being admitted with a primary diagnosis of \\nischemic stroke. Rate of a foreign body left inside the patient\u2019s body during a procedure, per 100,000 hospital discharges (age 15+). Rate of post-operative pulmonary embolism, per 100,000 discharges for hip and knee replacement (age 15+). Rate of post-operative sepsis, per 100,000 discharges for abdominal surgery (age 15+). Percentage of vaginal deliveries with third- or fourth-degree obstetric trauma, per 100 instrument-assisted vaginal deliveries. Percentage of vaginal deliveries with third- or fourth-degree obstetric trauma, per 100 vaginal deliveries without instrument\\xa0assistance. Percentage of patients with diabetes with prescription of first-choice antihypertensive medication. Number per 1,000 patients age 65+ with prescriptions of more than 365 daily doses of benzodiazepines or related drugs. Number per 1,000 patients age 65+ with at least one prescription of long-acting benzodiazepines or related drugs. Total volume of antibiotics prescribed for systemic use, in defined daily doses per 1,000 population per day. Volume of second-line antibiotics as a percentage of all antibiotics prescribed. nan nan nan'<\/pre>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"183e\">Clean data<\/h2>\n\n\n\n<pre class=\"wp-block-preformatted\">text = clean_text(raw_indicator)<br \/><br \/># I wrote similar code as part of NLP assignments<br \/># remove punctuations<br \/>from nltk.tokenize import RegexpTokenizer<br \/>tokenizer = RegexpTokenizer(\"[\\w]+\")<br \/>text = tokenizer.tokenize(text)<br \/><br \/>text<br \/><br \/><br \/># get the list of stop words<br \/>from nltk.corpus import stopwords<br \/>stops = set(stopwords.words('english'))<br \/>print ('stop word count', len(stops) )<br \/><br \/># just to show a partial list of stop words<br \/>stop_l = list(stops)<br \/>stop_l[:5]<br \/><br \/><br \/># remove stopwords<br \/>text_list = [] #text.split(\" \")<br \/>for aTok in text:<br \/>    if aTok not in stops:<br \/>        text_list.append(aTok)<br \/><br \/>text_liststop word count 179<br \/><br \/><br \/><br \/><br \/><br \/>['cancer',<br \/> 'mortality',<br \/> 'f',<br \/> 'cancer',<br \/> 'mortality',<br \/> 'heart',<br \/> 'disease',<br \/> 'mortality',<br \/> 'stroke',<br \/> 'mortality',<br \/> 'transport',<br \/> 'accident',<br \/> 'mortality',<br \/> 'f',<br \/> 'transport',<br \/> 'accident',<br \/> 'mortality',<br \/> 'suicide',<br \/> 'f',<br \/> 'suicide',<br \/> 'infant',<br \/> 'mortality',<br \/> 'perceived',<br \/> 'health',<br \/> 'status',<br \/> 'life',<br \/> 'expectancy',<br \/> 'birth',<br \/> 'f',<br \/> 'life',<br \/> 'expectancy',<br \/> 'birth',<br \/> 'fruit',<br \/> 'consumption',<br \/> 'adults',<br \/> 'vegetable',<br \/> 'consumption',<br \/> 'adults',<br \/> 'smoking',<br \/> 'adults',<br \/> 'f',<br \/> 'smoking',<br \/> 'adults',<br \/> 'alcohol',<br \/> 'consumption',<br \/> 'adults',<br \/> 'obesity',<br \/> 'reported',<br \/> 'adults',<br \/> 'wait',<br \/> 'time',<br \/> 'cataract',<br \/> 'surgery',<br \/> 'wait',<br \/> 'time',<br \/> 'hip',<br \/> 'replacement',<br \/> 'wait',<br \/> 'time',<br \/> 'knee',<br \/> 'replacement',<br \/> 'next',<br \/> 'day',<br \/> 'appt',<br \/> 'poor',<br \/> 'weekend',<br \/> 'evening',<br \/> 'care',<br \/> 'wait',<br \/> 'time',<br \/> 'specialist',<br \/> 'inability',<br \/> 'pay',<br \/> 'medical',<br \/> 'bills',<br \/> 'regular',<br \/> 'doctor',<br \/> 'influenza',<br \/> 'vaccination',<br \/> 'avoidable',<br \/> 'admissions',<br \/> 'copd',<br \/> 'avoidable',<br \/> 'admissions',<br \/> 'asthma',<br \/> 'avoidable',<br \/> 'admissions',<br \/> 'diabetes',<br \/> 'time',<br \/> 'spent',<br \/> 'doctor',<br \/> 'easy',<br \/> 'understand',<br \/> 'doctor',<br \/> 'know',<br \/> 'important',<br \/> 'medical',<br \/> 'history',<br \/> 'involvement',<br \/> 'decisions',<br \/> 'breast',<br \/> 'cancer',<br \/> 'survival',<br \/> 'breast',<br \/> 'cancer',<br \/> 'mortality',<br \/> 'cervical',<br \/> 'cancer',<br \/> 'survival',<br \/> 'cervical',<br \/> 'cancer',<br \/> 'mortality',<br \/> 'colorectal',<br \/> 'cancer',<br \/> 'survival',<br \/> 'colorectal',<br \/> 'cancer',<br \/> 'mortality',<br \/> 'day',<br \/> 'hospital',<br \/> 'fatality',<br \/> 'ami',<br \/> 'day',<br \/> 'hospital',<br \/> 'fatality',<br \/> 'ischemic',<br \/> 'stroke',<br \/> 'foreign',<br \/> 'body',<br \/> 'left',<br \/> 'post',<br \/> 'op',<br \/> 'pe',<br \/> 'hip',<br \/> 'knee',<br \/> 'post',<br \/> 'op',<br \/> 'sepsis',<br \/> 'abdominal',<br \/> 'ob',<br \/> 'trauma',<br \/> 'instrument',<br \/> 'ob',<br \/> 'trauma',<br \/> 'instrument',<br \/> 'diabetes',<br \/> 'high',<br \/> 'blood',<br \/> 'pressure',<br \/> 'medication',<br \/> 'benzodiazepines',<br \/> 'chronic',<br \/> 'use',<br \/> 'benzodiazepines',<br \/> 'long',<br \/> 'acting',<br \/> 'antibiotics',<br \/> 'total',<br \/> 'volume',<br \/> 'systemic',<br \/> 'use',<br \/> 'antibiotics',<br \/> 'proportion',<br \/> 'second',<br \/> 'line',<br \/> 'notes',<br \/> 'oecd',<br \/> 'organisation',<br \/> 'economic',<br \/> 'co',<br \/> 'operation',<br \/> 'development',<br \/> 'province',<br \/> 'determined',<br \/> 'patient',<br \/> 'residence',<br \/> 'indicators',<br \/> 'except',<br \/> 'patient',<br \/> 'safety',<br \/> 'dimension',<br \/> 'calculated',<br \/> 'facility',<br \/> 'province']# find all unigrams<br \/><br \/><br \/>unigrams = ngrams(text_list, 1) # resulting object is an iterator<br \/># bigrams = ngrams(text_list, 2) # <br \/>unigrams = list(ngrams(text_list, 1)) # resulting object is an iterator<br \/><br \/>#for uni in unigrams: #<br \/>    #print(uni); #<br \/>    <br \/>freq = Counter(unigrams)<br \/>#print(freq) #    <br \/><br \/>topN = freq.most_common()[1:20] #top frequent 20 words<br \/>#print(topN) #<br \/>wordscount = {w[0]:f for w, f in topN}<br \/>sorted_wordscount = sorted(wordscount.items(), key=operator.itemgetter(1),reverse=True)<br \/>#print(sorted_wordscount) #<br \/><br \/><br \/>## use pytag package<br \/>create_tag_image(make_tags(sorted_wordscount[:],maxsize=40), 'filename.png', size=(250,200), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Molengo', rectangular=True)<br \/>Image(\"filename.png\")<\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2AHAGdzdJx2HlFMnZYkdC-sg.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<pre class=\"wp-block-preformatted\">text_again = ''<br \/>for aWord in text_list:<br \/>    if len(aWord) &gt; 2:<br \/>        text_again += ' ' + aWord<br \/><br \/>text_again' cancer mortality cancer mortality heart disease mortality stroke mortality transport accident mortality transport accident mortality suicide suicide infant mortality perceived health status life expectancy birth life expectancy birth fruit consumption adults vegetable consumption adults smoking adults smoking adults alcohol consumption adults obesity reported adults wait time cataract surgery wait time hip replacement wait time knee replacement next day appt poor weekend evening care wait time specialist inability pay medical bills regular doctor influenza vaccination avoidable admissions copd avoidable admissions asthma avoidable admissions diabetes time spent doctor easy understand doctor know important medical history involvement decisions breast cancer survival breast cancer mortality cervical cancer survival cervical cancer mortality colorectal cancer survival colorectal cancer mortality day hospital fatality ami day hospital fatality ischemic stroke foreign body left post hip knee post sepsis abdominal trauma instrument trauma instrument diabetes high blood pressure medication benzodiazepines chronic use benzodiazepines long acting antibiotics total volume systemic use antibiotics proportion second line notes oecd organisation economic operation development province determined patient residence indicators except patient safety dimension calculated facility province'## using wordcloud package<br \/>wordcloud = WordCloud(max_font_size=40).generate(text_again)<br \/>plt.figure()<br \/>plt.imshow(wordcloud)<br \/>plt.axis(\"off\")<br \/>plt.show()<br \/><br \/>## use custom scoring<br \/>wordscount = {w[0]:f for w, f in topN}   <br \/><br \/>wordcloud = WordCloud(max_font_size=40)<br \/>wordcloud.fit_words(wordscount)<br \/>plt.figure()<br \/>plt.imshow(wordcloud)<br \/>plt.axis(\"off\")<br \/>plt.show()<\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2ApZY_pf5CNXCsxx9w2JhNSg.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2AgNQ6aV8Ibh8ZMM5sU32-rQ.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"cec1\">Word Association<\/h2>\n\n\n\n<pre class=\"wp-block-preformatted\"># split text into sentences<br \/># each sentence is a \"market basket\"<br \/>tokenizer = nltk.data.load('tokenizers\/punkt\/english.pickle')<br \/>#tokenizer = nltk.data.load('tokenizers\/punkt\/english.pickle')<br \/>#review_text = BeautifulSoup(raw_indicator)<br \/>review_text = BeautifulSoup(raw_indicator)<br \/>for script in review_text([\"script\", \"style\"]):<br \/>        script.extract()    # rip it out<br \/># get text<br \/>review_text = review_text.get_text()<br \/>sentences = tokenizer.tokenize(review_text)<br \/><br \/><br \/>##naive implementation<br \/>word_association = Counter()<br \/>for sent in sentences:<br \/>    bigrams = Counter(ngrams([w for w in sent.lower().split(\" \") if not w in stopwords.words('english')], 2))<br \/>    word_association.update(bigrams)<br \/>    <br \/>topN = word_association.most_common()[1:20]  <br \/><br \/>G = nx.Graph()<br \/>for edge in topN:<br \/>    G.add_edge(edge[0][0], edge[0][1], weight=edge[1])<br \/>pos=nx.circular_layout(G)<br \/><br \/>plt.figure(3,figsize=(6,6)) <br \/>nx.draw(G, pos, with_labels = True, font_size=10, edge_color='blue', node_color='white', font_weight='bold')<br \/>plt.show()<br \/><br \/># http:\/\/stackoverflow.com\/questions\/13429094\/implementing-a-word-tree-using-nested-dictionaries-in-python<br \/>edges = []<br \/><br \/><br \/><br \/>word_sents_arrays = []<br \/>for sent in sentences[1:5]:<br \/>    word_sents_arrays.append(clean_text(sent).split())<br \/>print(sentences[1:5])<br \/><br \/><br \/>mnn, edges= doall(word_sents_arrays,{})  <br \/><br \/><br \/><br \/><br \/>G=nx.Graph()<br \/>G.add_edges_from(edges)<br \/>G1 = nx.Graph(nx.minimum_spanning_edges(G))<br \/>pos = hierarchy_pos(G1,'root')  <br \/><br \/>plt.figure(3,figsize=(10,10)) <br \/>nx.draw(G1, pos=pos, with_labels=True, edge_color='blue', node_size=30, font_size=10, node_color='white')<br \/>plt.show()<\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2AiUJb2SOAlSsi-8nNbYeKLA.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<pre class=\"wp-block-preformatted\">['Province is determined by patient residence for all indicators except those in the patient safety dimension, which are calculated by facility province.']<\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2A3ZLjsUXiJuBSgLhOzafhsA.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<h1 class=\"wp-block-heading\" id=\"22b4\">Understanding what are measured<\/h1>\n\n\n\n<pre class=\"wp-block-preformatted\">text = clean_text(raw_measure)<br \/><br \/># I wrote similar code as part of NLP assignments<br \/># remove punctuations<br \/>from nltk.tokenize import RegexpTokenizer<br \/>tokenizer = RegexpTokenizer(\"[\\w]+\")<br \/>text = tokenizer.tokenize(text)<br \/><br \/>text<br \/><br \/><br \/># get the list of stop words<br \/>from nltk.corpus import stopwords<br \/>stops = set(stopwords.words('english'))<br \/>print ('stop word count', len(stops) )<br \/><br \/># just to show a partial list of stop words<br \/>stop_l = list(stops)<br \/>stop_l[:5]<br \/><br \/><br \/># remove stopwords<br \/>text_list = [] #text.split(\" \")<br \/>for aTok in text:<br \/>    if aTok not in stops:<br \/>        text_list.append(aTok)<br \/><br \/>text_liststop word count 179<br \/><br \/><br \/><br \/><br \/><br \/>['number',<br \/> 'deaths',<br \/> 'due',<br \/> 'cancer',<br \/> 'per',<br \/> 'females',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'cancer',<br \/> 'per',<br \/> 'males',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'ischemic',<br \/> 'heart',<br \/> 'disease',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'ischemic',<br \/> 'heart',<br \/> 'disease',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'ischemic',<br \/> 'heart',<br \/> 'disease',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'ischemic',<br \/> 'heart',<br \/> 'disease',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'cerebrovascular',<br \/> 'diseases',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'cerebrovascular',<br \/> 'diseases',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'cerebrovascular',<br \/> 'diseases',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'cerebrovascular',<br \/> 'diseases',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'transport',<br \/> 'accidents',<br \/> 'per',<br \/> 'females',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'transport',<br \/> 'accidents',<br \/> 'per',<br \/> 'females',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'transport',<br \/> 'accidents',<br \/> 'per',<br \/> 'females',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'transport',<br \/> 'accidents',<br \/> 'per',<br \/> 'females',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'transport',<br \/> 'accidents',<br \/> 'per',<br \/> 'males',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'transport',<br \/> 'accidents',<br \/> 'per',<br \/> 'males',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'transport',<br \/> 'accidents',<br \/> 'per',<br \/> 'males',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'transport',<br \/> 'accidents',<br \/> 'per',<br \/> 'males',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'suicide',<br \/> 'per',<br \/> 'females',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'suicide',<br \/> 'per',<br \/> 'males',<br \/> 'deaths',<br \/> 'children',<br \/> 'younger',<br \/> 'year',<br \/> 'per',<br \/> 'live',<br \/> 'births',<br \/> 'percentage',<br \/> 'population',<br \/> 'age',<br \/> 'report',<br \/> 'health',<br \/> 'good',<br \/> 'better',<br \/> 'average',<br \/> 'number',<br \/> 'years',<br \/> 'female',<br \/> 'expected',<br \/> 'live',<br \/> 'assuming',<br \/> 'age',<br \/> 'specific',<br \/> 'mortality',<br \/> 'levels',<br \/> 'remain',<br \/> 'constant',<br \/> 'average',<br \/> 'number',<br \/> 'years',<br \/> 'male',<br \/> 'expected',<br \/> 'live',<br \/> 'assuming',<br \/> 'age',<br \/> 'specific',<br \/> 'mortality',<br \/> 'levels',<br \/> 'remain',<br \/> 'constant',<br \/> 'percentage',<br \/> 'population',<br \/> 'age',<br \/> 'report',<br \/> 'eating',<br \/> 'fruit',<br \/> 'least',<br \/> 'per',<br \/> 'day',<br \/> 'percentage',<br \/> 'population',<br \/> 'age',<br \/> 'report',<br \/> 'eating',<br \/> 'vegetables',<br \/> 'least',<br \/> 'per',<br \/> 'day',<br \/> 'percentage',<br \/> 'female',<br \/> 'population',<br \/> 'age',<br \/> 'report',<br \/> 'daily',<br \/> 'smokers',<br \/> 'percentage',<br \/> 'male',<br \/> 'population',<br \/> 'age',<br \/> 'report',<br \/> 'daily',<br \/> 'smokers',<br \/> 'average',<br \/> 'annual',<br \/> 'alcohol',<br \/> 'consumption',<br \/> 'litres',<br \/> 'per',<br \/> 'capita',<br \/> 'age',<br \/> 'average',<br \/> 'annual',<br \/> 'alcohol',<br \/> 'consumption',<br \/> 'litres',<br \/> 'per',<br \/> 'capita',<br \/> 'age',<br \/> 'average',<br \/> 'annual',<br \/> 'alcohol',<br \/> 'consumption',<br \/> 'litres',<br \/> 'per',<br \/> 'capita',<br \/> 'age',<br \/> 'percentage',<br \/> 'adults',<br \/> 'obese',<br \/> 'body',<br \/> 'mass',<br \/> 'index',<br \/> 'higher',<br \/> 'kg',<br \/> 'self',<br \/> 'report',<br \/> 'median',<br \/> 'wait',<br \/> 'time',<br \/> 'days',<br \/> 'specialist',<br \/> 'assessment',<br \/> 'booking',<br \/> 'date',<br \/> 'cataract',<br \/> 'surgery',<br \/> 'median',<br \/> 'wait',<br \/> 'time',<br \/> 'days',<br \/> 'specialist',<br \/> 'assessment',<br \/> 'booking',<br \/> 'date',<br \/> 'cataract',<br \/> 'surgery',<br \/> 'median',<br \/> 'wait',<br \/> 'time',<br \/> 'days',<br \/> 'specialist',<br \/> 'assessment',<br \/> 'booking',<br \/> 'date',<br \/> 'cataract',<br \/> 'surgery',<br \/> 'median',<br \/> 'wait',<br \/> 'time',<br \/> 'days',<br \/> 'specialist',<br \/> 'assessment',<br \/> 'booking',<br \/> 'date',<br \/> 'hip',<br \/> 'replacement',<br \/> 'median',<br \/> 'wait',<br \/> 'time',<br \/> 'days',<br \/> 'specialist',<br \/> 'assessment',<br \/> 'booking',<br \/> 'date',<br \/> 'hip',<br \/> 'replacement',<br \/> 'median',<br \/> 'wait',<br \/> 'time',<br \/> 'days',<br \/> 'specialist',<br \/> 'assessment',<br \/> 'booking',<br \/> 'date',<br \/> 'hip',<br \/> 'replacement',<br \/> 'median',<br \/> 'wait',<br \/> 'time',<br \/> 'days',<br \/> 'specialist',<br \/> 'assessment',<br \/> 'booking',<br \/> 'date',<br \/> 'knee',<br \/> 'replacement',<br \/> 'median',<br \/> 'wait',<br \/> 'time',<br \/> 'days',<br \/> 'specialist',<br \/> 'assessment',<br \/> 'booking',<br \/> 'date',<br \/> 'knee',<br \/> 'replacement',<br \/> 'median',<br \/> 'wait',<br \/> 'time',<br \/> 'days',<br \/> 'specialist',<br \/> 'assessment',<br \/> 'booking',<br \/> 'date',<br \/> 'knee',<br \/> 'replacement',<br \/> 'percentage',<br \/> 'people',<br \/> 'able',<br \/> 'get',<br \/> 'appointment',<br \/> 'see',<br \/> 'doctor',<br \/> 'nurse',<br \/> 'next',<br \/> 'day',<br \/> 'last',<br \/> 'time',<br \/> 'sick',<br \/> 'needed',<br \/> 'medical',<br \/> 'attention',<br \/> 'percentage',<br \/> 'people',<br \/> 'needed',<br \/> 'care',<br \/> 'hours',<br \/> 'reported',<br \/> 'difficulty',<br \/> 'getting',<br \/> 'medical',<br \/> 'care',<br \/> 'evenings',<br \/> 'weekends',<br \/> 'holidays',<br \/> 'without',<br \/> 'going',<br \/> 'hospital',<br \/> 'emergency',<br \/> 'department',<br \/> 'emergency',<br \/> 'room',<br \/> 'percentage',<br \/> 'adults',<br \/> 'waited',<br \/> 'weeks',<br \/> 'advised',<br \/> 'see',<br \/> 'decided',<br \/> 'see',<br \/> 'specialist',<br \/> 'percentage',<br \/> 'people',<br \/> 'medical',<br \/> 'problem',<br \/> 'consult',<br \/> 'visit',<br \/> 'doctor',<br \/> 'cost',<br \/> 'percentage',<br \/> 'people',<br \/> 'regular',<br \/> 'doctor',<br \/> 'place',<br \/> 'care',<br \/> 'percentage',<br \/> 'adults',<br \/> 'age',<br \/> 'received',<br \/> 'influenza',<br \/> 'vaccination',<br \/> 'within',<br \/> 'past',<br \/> 'year',<br \/> 'number',<br \/> 'hospital',<br \/> 'discharges',<br \/> 'copd',<br \/> 'people',<br \/> 'age',<br \/> 'older',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'hospital',<br \/> 'discharges',<br \/> 'asthma',<br \/> 'people',<br \/> 'age',<br \/> 'older',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'hospital',<br \/> 'discharges',<br \/> 'diabetes',<br \/> 'people',<br \/> 'age',<br \/> 'older',<br \/> 'per',<br \/> 'population',<br \/> 'percentage',<br \/> 'adults',<br \/> 'report',<br \/> 'regular',<br \/> 'doctor',<br \/> 'always',<br \/> 'often',<br \/> 'spent',<br \/> 'enough',<br \/> 'time',<br \/> 'percentage',<br \/> 'adults',<br \/> 'report',<br \/> 'regular',<br \/> 'doctor',<br \/> 'always',<br \/> 'often',<br \/> 'explains',<br \/> 'things',<br \/> 'way',<br \/> 'easy',<br \/> 'understand',<br \/> 'percentage',<br \/> 'older',<br \/> 'adults',<br \/> 'age',<br \/> 'report',<br \/> 'regular',<br \/> 'doctor',<br \/> 'always',<br \/> 'often',<br \/> 'gave',<br \/> 'opportunity',<br \/> 'ask',<br \/> 'questions',<br \/> 'raise',<br \/> 'concerns',<br \/> 'percentage',<br \/> 'adults',<br \/> 'report',<br \/> 'regular',<br \/> 'doctor',<br \/> 'always',<br \/> 'often',<br \/> 'involved',<br \/> 'much',<br \/> 'wanted',<br \/> 'decisions',<br \/> 'care',<br \/> 'treatment',<br \/> 'year',<br \/> 'relative',<br \/> 'survival',<br \/> 'rate',<br \/> 'breast',<br \/> 'cancer',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'breast',<br \/> 'cancer',<br \/> 'per',<br \/> 'females',<br \/> 'year',<br \/> 'relative',<br \/> 'survival',<br \/> 'rate',<br \/> 'cervical',<br \/> 'cancer',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'cervical',<br \/> 'cancer',<br \/> 'per',<br \/> 'females',<br \/> 'year',<br \/> 'relative',<br \/> 'survival',<br \/> 'rate',<br \/> 'colorectal',<br \/> 'cancer',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'colorectal',<br \/> 'cancer',<br \/> 'per',<br \/> 'population',<br \/> 'number',<br \/> 'deaths',<br \/> 'due',<br \/> 'colorectal',<br \/> 'cancer',<br \/> 'per',<br \/> 'population',<br \/> 'percentage',<br \/> 'patients',<br \/> 'age',<br \/> 'die',<br \/> 'hospital',<br \/> 'within',<br \/> 'days',<br \/> 'admitted',<br \/> 'primary',<br \/> 'diagnosis',<br \/> 'acute',<br \/> 'myocardial',<br \/> 'infarction',<br \/> 'ami',<br \/> 'percentage',<br \/> 'patients',<br \/> 'age',<br \/> 'die',<br \/> 'hospital',<br \/> 'within',<br \/> 'days',<br \/> 'admitted',<br \/> 'primary',<br \/> 'diagnosis',<br \/> 'ischemic',<br \/> 'stroke',<br \/> 'percentage',<br \/> 'patients',<br \/> 'age',<br \/> 'die',<br \/> 'hospital',<br \/> 'within',<br \/> 'days',<br \/> 'admitted',<br \/> 'primary',<br \/> 'diagnosis',<br \/> 'ischemic',<br \/> 'stroke',<br \/> 'rate',<br \/> 'foreign',<br \/> 'body',<br \/> 'left',<br \/> 'inside',<br \/> 'patient',<br \/> 'body',<br \/> 'procedure',<br \/> 'per',<br \/> 'hospital',<br \/> 'discharges',<br \/> 'age',<br \/> 'rate',<br \/> 'post',<br \/> 'operative',<br \/> 'pulmonary',<br \/> 'embolism',<br \/> 'per',<br \/> 'discharges',<br \/> 'hip',<br \/> 'knee',<br \/> 'replacement',<br \/> 'age',<br \/> 'rate',<br \/> 'post',<br \/> 'operative',<br \/> 'sepsis',<br \/> 'per',<br \/> 'discharges',<br \/> 'abdominal',<br \/> 'surgery',<br \/> 'age',<br \/> 'percentage',<br \/> 'vaginal',<br \/> 'deliveries',<br \/> 'third',<br \/> 'fourth',<br \/> 'degree',<br \/> 'obstetric',<br \/> 'trauma',<br \/> 'per',<br \/> 'instrument',<br \/> 'assisted',<br \/> 'vaginal',<br \/> 'deliveries',<br \/> 'percentage',<br \/> 'vaginal',<br \/> 'deliveries',<br \/> 'third',<br \/> 'fourth',<br \/> 'degree',<br \/> 'obstetric',<br \/> 'trauma',<br \/> 'per',<br \/> 'vaginal',<br \/> 'deliveries',<br \/> 'without',<br \/> 'instrument',<br \/> 'assistance',<br \/> 'percentage',<br \/> 'patients',<br \/> 'diabetes',<br \/> 'prescription',<br \/> 'first',<br \/> 'choice',<br \/> 'antihypertensive',<br \/> 'medication',<br \/> 'number',<br \/> 'per',<br \/> 'patients',<br \/> 'age',<br \/> 'prescriptions',<br \/> 'daily',<br \/> 'doses',<br \/> 'benzodiazepines',<br \/> 'related',<br \/> 'drugs',<br \/> 'number',<br \/> 'per',<br \/> 'patients',<br \/> 'age',<br \/> 'least',<br \/> 'one',<br \/> 'prescription',<br \/> 'long',<br \/> 'acting',<br \/> 'benzodiazepines',<br \/> 'related',<br \/> 'drugs',<br \/> 'total',<br \/> 'volume',<br \/> 'antibiotics',<br \/> 'prescribed',<br \/> 'systemic',<br \/> 'use',<br \/> 'defined',<br \/> 'daily',<br \/> 'doses',<br \/> 'per',<br \/> 'population',<br \/> 'per',<br \/> 'day',<br \/> 'volume',<br \/> 'second',<br \/> 'line',<br \/> 'antibiotics',<br \/> 'percentage',<br \/> 'antibiotics',<br \/> 'prescribed',<br \/> 'nan',<br \/> 'nan',<br \/> 'nan']# text = clean_text(raw_measure)<br \/># text_list = text.split(\" \")<br \/><br \/><br \/># print(text_list)  #<br \/>unigrams = ngrams(text_list, 1) # resulting object is an iterator<br \/># bigrams = ngrams(text_list, 2) # <br \/>unigrams = list(ngrams(text_list, 1)) # resulting object is an iterator<br \/><br \/>#for uni in unigrams: #<br \/>    #print(uni); #<br \/>    <br \/>freq = Counter(unigrams)<br \/>#print(freq) #    <br \/><br \/>topN = freq.most_common()[1:20] #top frequent 20 words<br \/>#print(topN) #<br \/>wordscount = {w[0]:f for w, f in topN}<br \/>sorted_wordscount = sorted(wordscount.items(), key=operator.itemgetter(1),reverse=True)<br \/>#print(sorted_wordscount) #<br \/><br \/><br \/>## use pytag package<br \/>create_tag_image(make_tags(sorted_wordscount[:],maxsize=40), 'filename.png', size=(250,200), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Molengo', rectangular=True)<br \/>Image(\"filename.png\")<\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2AKk1qrm4S37hn7atAOOXdHA.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<pre class=\"wp-block-preformatted\">text_again = ''<br \/>for aWord in text_list:<br \/>    if len(aWord) &gt; 2:<br \/>        text_again += ' ' + aWord<br \/><br \/>text_again<br \/><br \/>## using wordcloud package<br \/>wordcloud = WordCloud(max_font_size=40).generate(text_again)<br \/>plt.figure()<br \/>plt.imshow(wordcloud)<br \/>plt.axis(\"off\")<br \/>plt.show()<br \/><br \/>## use custom scoring<br \/>wordscount = {w[0]:f for w, f in topN}   <br \/><br \/>wordcloud = WordCloud(max_font_size=40)<br \/>wordcloud.fit_words(wordscount)<br \/>plt.figure()<br \/>plt.imshow(wordcloud)<br \/>plt.axis(\"off\")<br \/>plt.show()<\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2AQvN9q3k2-xYvIN2VKdbbdg.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2A1sJmOr3wMWTa2ZAcQCBRuw.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<pre class=\"wp-block-preformatted\"># split text into sentences<br \/># each sentence is a \"market basket\"<br \/>tokenizer = nltk.data.load('tokenizers\/punkt\/english.pickle')<br \/>#tokenizer = nltk.data.load('tokenizers\/punkt\/english.pickle')<br \/>review_text = BeautifulSoup(raw_measure)<br \/>for script in review_text([\"script\", \"style\"]):<br \/>        script.extract()    # rip it out<br \/># get text<br \/>review_text = review_text.get_text()<br \/>sentences = tokenizer.tokenize(review_text)<br \/><br \/><br \/>##naive implementation<br \/>word_association = Counter()<br \/>for sent in sentences:<br \/>    bigrams = Counter(ngrams([w for w in sent.lower().split(\" \") if not w in stopwords.words('english')], 2))<br \/>    word_association.update(bigrams)<br \/>    <br \/>topN = word_association.most_common()[1:20]  <br \/><br \/>G = nx.Graph()<br \/>for edge in topN:<br \/>    G.add_edge(edge[0][0], edge[0][1], weight=edge[1])<br \/>pos=nx.circular_layout(G)<br \/><br \/>plt.figure(3,figsize=(6,6)) <br \/>nx.draw(G, pos, with_labels = True, font_size=10, edge_color='blue', node_color='white', font_weight='bold')<br \/>plt.show()<br \/><br \/># http:\/\/stackoverflow.com\/questions\/13429094\/implementing-a-word-tree-using-nested-dictionaries-in-python<br \/>edges = []<br \/><br \/>word_sents_arrays = []<br \/>for sent in sentences[1:5]:<br \/>    word_sents_arrays.append(clean_text(sent).split())<br \/>print(sentences[1:5])<br \/><br \/><br \/>mnn, edges= doall(word_sents_arrays,{})  <br \/><br \/><br \/><br \/><br \/>G=nx.Graph()<br \/>G.add_edges_from(edges)<br \/>G1 = nx.Graph(nx.minimum_spanning_edges(G))<br \/>pos = hierarchy_pos(G1,'root')  <br \/><br \/>plt.figure(3,figsize=(10,10)) <br \/>nx.draw(G1, pos=pos, with_labels=True, edge_color='blue', node_size=30, font_size=10, node_color='white')<br \/>plt.show()<\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2Acz3vkr6yrYhjfnHxSn51Xg.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<pre class=\"wp-block-preformatted\">['Number of deaths due to cancer per 100,000 males.', 'Number of deaths due to ischemic heart disease \\nper 100,000 population.Number of deaths due to ischemic heart disease \\nper 100,000 population.Number of deaths due to ischemic heart disease \\nper 100,000 population.Number of deaths due to ischemic heart disease \\nper 100,000 population.', 'Number of deaths due to cerebrovascular diseases \\nper 100,000 population.Number of deaths due to cerebrovascular diseases \\nper 100,000 population.Number of deaths due to cerebrovascular diseases \\nper 100,000 population.Number of deaths due to cerebrovascular diseases \\nper 100,000 population.', 'Number of deaths due to transport accidents \\nper 100,000 females.Number of deaths due to transport accidents \\nper 100,000 females.Number of deaths due to transport accidents \\nper 100,000 females.Number of deaths due to transport accidents \\nper 100,000 females.']<\/pre>\n\n\n\n<figure class=\"wp-block-image\"><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/miro.medium.com\/proxy\/1%2AWoiP9wkctprM6rhR7dPD4w.png?w=750&#038;ssl=1\" alt=\"png\"\/><\/figure>\n\n\n\n<h1 class=\"wp-block-heading\" id=\"25b4\">reference:<\/h1>\n\n\n\n<ol class=\"wp-block-list\"><li>pd.merge reference:&nbsp;<a href=\"https:\/\/www.shanelynn.ie\/merge-join-dataframes-python-pandas-index-1\/\">https:\/\/www.shanelynn.ie\/merge-join-dataframes-python-pandas-index-1\/<\/a>&nbsp;: though I could just read and string concatenate in a loop<\/li><\/ol>\n","protected":false},"excerpt":{"rendered":"<p>Justetc Social Services (non-profit)Jan 31&nbsp;\u00b7&nbsp;16&nbsp;min read This code works with the data on the excel file: indicator-methodology.xls Purpose: Find out prominant indicators \u2014 this might also mean the critical aspect of health Find out the measurements that are used to find the quality Code Reference: This code heavily makes use of the code provided on &hellip; <\/p>\n<p><a class=\"more-link btn\" href=\"http:\/\/bangla.sitestree.com\/?p=19932\">Continue reading<\/a><\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_feature_clip_id":0,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_post_was_ever_published":false},"categories":[1903,1894,1919,182],"tags":[],"class_list":["post-19932","post","type-post","status-publish","format-standard","hentry","category-data-visualization","category-health-system-performance","category-visualize-health-system-performance","category---blog","item-wrap"],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack-related-posts":[{"id":14916,"url":"http:\/\/bangla.sitestree.com\/?p=14916","url_meta":{"origin":19932,"position":0},"title":"How different Canadian Regions\/Provinces compare for Health Status Indicators?","author":"Sayed","date":"July 15, 2019","format":false,"excerpt":"As my code found using a dataset (on Health System Performance) from CIHI, Canada. Available Data Averaged for 2013 to 2017 Health Status Indicators: ['Cancer Mortality (F)', 'Cancer Mortality (M)', 'Heart Disease Mortality', 'Infant Mortality', 'Life Expectancy at Birth (F)', 'Life Expectancy at Birth (M)', 'Perceived Health Status', 'Stroke Mortality',\u2026","rel":"","context":"In &quot;Health Status&quot;","block_context":{"text":"Health Status","link":"http:\/\/bangla.sitestree.com\/?cat=1898"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/bangla.salearningschool.com\/wp-content\/uploads\/2019\/07\/Screen-Shot-2019-07-15-at-9.53.26-PM-1-300x271.png?resize=350%2C200","width":350,"height":200},"classes":[]},{"id":19914,"url":"http:\/\/bangla.sitestree.com\/?p=19914","url_meta":{"origin":19932,"position":1},"title":"IPywidgets, interactive","author":"Sayed","date":"February 7, 2021","format":false,"excerpt":"Justetc Social Services (non-profit)\u00b7Jan 31 from __future__ import print_functionfrom ipywidgets import interact, interactive, fixed, interact_manualimport ipywidgets as widgetsdef f(x): return xinteract(f, x=10);interactive(children=(IntSlider(value=10, description='x', max=30, min=-10), Output()), _dom_classes=('widget-\u2026interact(f, x=True);interactive(children=(Checkbox(value=True, description='x'), Output()), _dom_classes=('widget-interact',))interact(f, x='Hi there!');interactive(children=(Text(value='Hi there!', description='x'), Output()), _dom_classes=('widget-interact',))@interact(x=True, y=1.0)def g(x, y): return (x, y)interactive(children=(Checkbox(value=True, description='x'), FloatSlider(value=1.0, description='y', max=3.0, \u2026interact(f, x=['apples','oranges']);interactive(children=(Dropdown(description='x', options=('apples',\u2026","rel":"","context":"In &quot;Data Visualization&quot;","block_context":{"text":"Data Visualization","link":"http:\/\/bangla.sitestree.com\/?cat=1903"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":15096,"url":"http:\/\/bangla.sitestree.com\/?p=15096","url_meta":{"origin":19932,"position":2},"title":"Health Status Indicators: Health System Performance","author":"Sayed","date":"July 23, 2019","format":false,"excerpt":"Health Status Indicators: Health System Performance Health Status Measurements Indicators: Dataset from CIHI, Canada Cancer Mortality (F)', 'Cancer Mortality (M)', 'Heart Disease Mortality', 'Infant Mortality', 'Life Expectancy at Birth (F)', 'Life Expectancy at Birth (M)', 'Perceived Health Status', 'Stroke Mortality', 'Suicide (F)', 'Suicide (M)', 'Transport Accident Mortality (F)', 'Transport Accident\u2026","rel":"","context":"In &quot;Health Status&quot;","block_context":{"text":"Health Status","link":"http:\/\/bangla.sitestree.com\/?cat=1898"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":15098,"url":"http:\/\/bangla.sitestree.com\/?p=15098","url_meta":{"origin":19932,"position":3},"title":"Non Medical Determinants (for Health) Indicators: Health System Performance","author":"Sayed","date":"July 23, 2019","format":false,"excerpt":"Non Medical Determinants (for Health) Indicators: Health System Performance 'Alcohol Consumption: Adults', 'Fruit Consumption: Adults', 'Obesity Reported: Adults', 'Smoking: Adults (F)', 'Smoking: Adults (M)', 'Vegetable Consumption: Adults' Reference: A dataset from CIHI, Canada. Sayed Ahmed Linkedin: https:\/\/ca.linkedin.com\/in\/sayedjustetc Blog: http:\/\/sitestree.com, http:\/\/bangla.salearningschool.com","rel":"","context":"In &quot;Health System Performance&quot;","block_context":{"text":"Health System Performance","link":"http:\/\/bangla.sitestree.com\/?cat=1894"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":15099,"url":"http:\/\/bangla.sitestree.com\/?p=15099","url_meta":{"origin":19932,"position":4},"title":"Patient Safety Indicators: Health System Performance","author":"Sayed","date":"July 23, 2019","format":false,"excerpt":"Patient Safety Indicators: Health System Performance 'Foreign Body Left In', 'OB Trauma: Instrument', 'OB Trauma: No Instrument', 'Post-Op PE: Hip and Knee', 'Post-Op Sepsis: Abdominal' Reference: A dataset from CIHI, Canada. Sayed Ahmed Linkedin: https:\/\/ca.linkedin.com\/in\/sayedjustetc Blog: http:\/\/sitestree.com, http:\/\/bangla.salearningschool.com","rel":"","context":"In &quot;Health System Performance&quot;","block_context":{"text":"Health System Performance","link":"http:\/\/bangla.sitestree.com\/?cat=1894"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":15097,"url":"http:\/\/bangla.sitestree.com\/?p=15097","url_meta":{"origin":19932,"position":5},"title":"Access to Care Indicators: Health System Performance","author":"Sayed","date":"July 23, 2019","format":false,"excerpt":"Access to Care Indicators: Health System Performance 'Inability to Pay for Medical Bills', 'Poor Weekend\/Evening Care', 'Regular Doctor', 'Same or Next Day Appt', 'Wait Time: Cataract Surgery', 'Wait Time: Hip Replacement', 'Wait Time: Knee Replacement', 'Wait Time: Specialist' Reference: A dataset from CIHI, Canada Sayed Ahmed Linkedin: https:\/\/ca.linkedin.com\/in\/sayedjustetc Blog: http:\/\/sitestree.com,\u2026","rel":"","context":"In &quot;Access to Care&quot;","block_context":{"text":"Access to Care","link":"http:\/\/bangla.sitestree.com\/?cat=1899"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]}],"_links":{"self":[{"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/posts\/19932","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=19932"}],"version-history":[{"count":1,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/posts\/19932\/revisions"}],"predecessor-version":[{"id":19933,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=\/wp\/v2\/posts\/19932\/revisions\/19933"}],"wp:attachment":[{"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=19932"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=19932"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/bangla.sitestree.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=19932"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}