In [1]:
from textblob import TextBlob

Tokenize

In [3]:
doc = TextBlob(u"Let's make the U.S.A. great again! Whadya say, Bob? #blessed")
In [8]:
doc.words
Out[8]:
WordList(['Let', "'s", 'make', 'the', 'U.S.A', 'great', 'again', 'Whadya', 'say', 'Bob', 'blessed'])

POS tagging

In [4]:
doc.tags
Out[4]:
[('Let', 'VB'),
 ("'s", 'POS'),
 ('make', 'VB'),
 ('the', 'DT'),
 ('U.S.A.', 'NNP'),
 ('great', 'JJ'),
 ('again', 'RB'),
 ('Whadya', 'NNP'),
 ('say', 'VBP'),
 ('Bob', 'NNP'),
 ('blessed', 'VBD')]

Sentence segmentation

In [2]:
review = TextBlob("It's the 21st century, and a group of space marines are sent to destroy a monster that terrorizes the entire galaxy, the ultimate threat... A Leprechaun! This was a very funny movie, with the Leprechaun teaming up with a dastardly space princess who wants the Leprechaun's gold. Together the killer their way through a group of hilarious characters (but not as hilarious as in Leprechaun 3). Especially the Doctor, which Dr. Evil (From Austin Powers) resembles. Altough this came out in February '97 while the first Austing came out in May '97. Anyway, this was the 4th highest renting horror movie of '97. This is the second best Lep movie, following behind #3. Both of this were directed by Brian Trenchard-Smith, and I think he should direct Leprechaun 5: Lep In The Hood (Which is going to be theatrical and have more comedy than horror, it will star Warwick Davis and Ice-T and will be set in an inner city Los Angeles neighborhood. It will film in late summer, '99.")
In [9]:
review.sentences
Out[9]:
[Sentence("It's the 21st century, and a group of space marines are sent to destroy a monster that terrorizes the entire galaxy, the ultimate threat... A Leprechaun!"),
 Sentence("This was a very funny movie, with the Leprechaun teaming up with a dastardly space princess who wants the Leprechaun's gold."),
 Sentence("Together the killer their way through a group of hilarious characters (but not as hilarious as in Leprechaun 3)."),
 Sentence("Especially the Doctor, which Dr."),
 Sentence("Evil (From Austin Powers) resembles."),
 Sentence("Altough this came out in February '97 while the first Austing came out in May '97."),
 Sentence("Anyway, this was the 4th highest renting horror movie of '97."),
 Sentence("This is the second best Lep movie, following behind #3."),
 Sentence("Both of this were directed by Brian Trenchard-Smith, and I think he should direct Leprechaun 5: Lep In The Hood (Which is going to be theatrical and have more comedy than horror, it will star Warwick Davis and Ice-T and will be set in an inner city Los Angeles neighborhood."),
 Sentence("It will film in late summer, '99.")]

Noun phrases

In [5]:
review.noun_phrases
Out[5]:
WordList(['space marines', 'entire galaxy', 'ultimate threat ...', 'leprechaun', 'funny movie', 'leprechaun', 'space princess', 'leprechaun', 'together', 'hilarious characters', 'leprechaun', 'especially', 'dr', 'evil', 'austin powers', 'altough', 'february', 'austing', 'may', 'anyway', 'horror movie', 'lep', 'brian trenchard-smith', 'leprechaun', 'lep', 'warwick davis', 'ice-t', 'inner city', 'los angeles', 'late summer'])

Sentiment analysis

The sentiment analyzer returns polarity (-1 negative, +1 positive) and subjectivity (0 objective, 1 subjective)

In [6]:
review.sentiment
Out[6]:
Sentiment(polarity=0.07375, subjectivity=0.49124999999999985)
In [7]:
review.sentiment.polarity
Out[7]:
0.07375

Word forms / lemmas

In [16]:
sentence = review.sentences[0]
print(sentence)
print(sentence.words)
print(sentence.words[10].singularize())
print(sentence.words[16].pluralize())
It's the 21st century, and a group of space marines are sent to destroy a monster that terrorizes the entire galaxy, the ultimate threat... A Leprechaun!
['It', "'s", 'the', '21st', 'century', 'and', 'a', 'group', 'of', 'space', 'marines', 'are', 'sent', 'to', 'destroy', 'a', 'monster', 'that', 'terrorizes', 'the', 'entire', 'galaxy', 'the', 'ultimate', 'threat', 'A', 'Leprechaun']
marine
monsters
In [19]:
from textblob import Word
Word("terrorizes").lemmatize()
Out[19]:
'terrorizes'
In [21]:
Word("cacti").lemmatize() # knows some odd ones
Out[21]:
'cactus'
In [24]:
Word("fungi").lemmatize() # doesn't know 'em all
Out[24]:
'fungi'
In [22]:
Word("sent").lemmatize() # doesn't work
Out[22]:
'sent'
In [23]:
Word("sent").lemmatize("v") # say it's a verb
Out[23]:
'send'

Spell "correction"

In [25]:
sentence5 = review.sentences[5] 
print(sentence5)
print(sentence5.correct())
Altough this came out in February '97 while the first Austing came out in May '97.
Although this came out in February '97 while the first Dusting came out in May '97.

Translation / language detection

In [28]:
en_blob = sentence
en_blob.translate(to='es')
Out[28]:
Sentence("Es el siglo XXI, y un grupo de marines espaciales son enviados para destruir a un monstruo que aterroriza a toda la galaxia, la última amenaza ... ¡Un duende!")
In [29]:
b = TextBlob(u"بسيط هو أفضل من مجمع")
b.detect_language()
Out[29]:
'ar'

Dependency parse

In [30]:
print(doc)
print(doc.parse())
Let's make the U.S.A. great again! Whadya say, Bob? #blessed
Let/VB/B-VP/O '/POS/O/O s/PRP/B-NP/O make/VB/B-VP/O the/DT/B-NP/O U.S.A./NNP/I-NP/O great/JJ/B-ADJP/O again/RB/B-ADVP/O !/./O/O
Whadya/NNP/B-NP/O say/VBP/B-VP/O ,/,/O/O Bob/NNP/B-NP/O ?/./O/O
#/#/O/O blessed/VBN/B-VP/O