MWE Tokenizer List

mwe_tokenizer = MWETokenizer([('chief','executive','officer'),('single','digit'),('single','digits'),('full','year'),('double','digit'),('double','digits'),
                              ('chief','financial','officer'),('fiscal','year'),('vice','president'),('investor','relations'),('senior','vice','president'),
                             ('executive','vice','president'),('property','and','casualty'),('board','of','directors'),('managing','director'),
                             ('chief','risk','officer'),('corporate','development'),('fourth','quarter'),
                              ('corporate','banking'),('general','counsel'),('third','quarter'),('cash','flow'),
                              ('first','quarter'),('second','quarter'),('capital','expenditures'),('tax','rate'),
                              ('adjusted','earnings','per','share'),('non','gaap'),('company','s','website'),('company','website'),
                              ('balance','sheet'),('production','cost'),('time','frame'),('run','rate'),('cost','savings'),
                              ('forward','looking'),('interest','rate'),('interest','rates'),('actual','results'),
                              ('chief','accounting','officer'),('revenue','growth'),('thank','you'),('press','release'),
                              ('long','term'),('free','cash','flow'),('cash','flow'),('earnings','call'),
                              ('earnings','conference','call'), ('conference','call'), ('wealth','management'),
                              ('earnings','period'),('adjusted','ebitda'),('good','morning'),('chief','investment','officer'),('good','afternoon'),
                              ('private','equity'),('year','over','year'),('operating','costs'),('all-time'),
                              ('gross','margin'),('gross','margins'),('non','cash'),('operating','expense'),('earnings','growth'),
                              ('earnings','release'),('current','year'),('capital','expenditure'),('first','mover','advantage'),
                              ('year','to','date'),('redemption','rate'),('net','debt'),
                              ('adjusted','net','income'),('average','selling','price'),('debt','maturities'),('top','line'),('real','time'),
                              ('better','than','expected'),('direct','to','consumer'),('sales','expectations'),('effective','tax','rate'),
                              ('operating','profits'),('gross','sales'),('market','leadership'),('market','leading'),
                              ('know','how'),('intangible','assets'),('cash','benefit'),('wells','fargo'),('record','high'),
                              ('strong','margins'),('sales','pipeline'),('global','footprint'),('high','performance'),
                              ('financial','performance'),('going forward'),('ramp','up'),('end','to','end'),('operational','growth'),
                              ('same','store','sales'),('same','store'),('go','to','market'),('retail','sales'),('fast','track'),
                              ('earnings','reports'),('market','share'),('net','income'),('product','development'),('gaap','measures'),('bottom','line'),
                              ('developing','countries'),('gross','profit'),('single','digit'),('adjusted','income'),('north','america')

                              
                             ])

 Stop Word List

from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
new_stopwords = ["sort","actually","guess","yeah","probably","sure","feel","getting","seeing","mean","sort","couple",
                 "side","talked","talk","given","able","big","brian","scott",'something','come','comes','coming',
                 'seen','certainly','put','move','trying','continues','try','thing','give','might','anything','thinking',
                 'executive_vice_president','ahead','add','saw','quite','different','taking','done','whether','always','ok',
                 'less','need','follow','rates','quarters','early','help','wanted','far','perspective','open','already','understand',
                 'talking','remark','remarks','major','step','plans','michael','talking','course','place','wondering',
                 'sense','color','process','hi','use','long','real','answer','low','capacity','making','appreciate','fact','numbers','relative',
                 'director','space','longer','every','particularly','mark','amount','specific','view','comment','another','base','comments',
                 'area','keep','ability','small','change','changes','build','areas','type','chief','tell','ask','came','kevin',
                 'officer','tell','risk','board','analyst','would','us','like','one','next','really','see','think','year',
                 'question','going','well','get','you','right','chief_executive_officer','kind','look','thanks','thank_you',
                 'little','right','bit','quarter','market','maybe','also','much','last','back','continue','president','say','new',
                 'good','business','growth','know','say','terms','yes','could','got','go','first','things','expect',
                 'years','obviously','guys','around','still','point','want','chief_financial_officer','time','operator',
                 'line','said','two','take','number','looking','pretty','end','make','markets','mentioned','half',
                 'capital','basis','okay','rate','call','part','good_morning','guidance','margin','overall','second','part',
                 'way','today','chairman','questions','executive','thank','lot','vice','morning','financial','great','forward',
                 'company','even','start','revenue','company','third','may','versus','term','three','value','fourth','fifth','level','sales',
                 'please','let','months','sales','cash'
        
                ] 



stopwords = stop_words.union(new_stopwords)