Skip to content

Commit caf250a

Browse files
added tfidf
1 parent 9aa7c48 commit caf250a

File tree

2 files changed

+223
-6
lines changed

2 files changed

+223
-6
lines changed

Diff for: Natural Language Processing/.ipynb_checkpoints/NLP Getting Started-checkpoint.ipynb

+111-3
Original file line numberDiff line numberDiff line change
@@ -3558,16 +3558,73 @@
35583558
},
35593559
{
35603560
"cell_type": "code",
3561-
"execution_count": 60,
3561+
"execution_count": 61,
35623562
"metadata": {},
35633563
"outputs": [
3564+
{
3565+
"name": "stdout",
3566+
"output_type": "stream",
3567+
"text": [
3568+
" (0, 11163)\t0.230266855924\n",
3569+
" (0, 10965)\t0.190734285451\n",
3570+
" (0, 8917)\t0.247046523768\n",
3571+
" (0, 8336)\t0.170468692922\n",
3572+
" (0, 7668)\t0.264033840655\n",
3573+
" (0, 7555)\t0.312538562607\n",
3574+
" (0, 6937)\t0.183469241361\n",
3575+
" (0, 6906)\t0.151584746647\n",
3576+
" (0, 6217)\t0.189155577328\n",
3577+
" (0, 5769)\t0.24984711893\n",
3578+
" (0, 5218)\t0.268705938625\n",
3579+
" (0, 5217)\t0.298351840882\n",
3580+
" (0, 4653)\t0.312538562607\n",
3581+
" (0, 2060)\t0.242039602564\n",
3582+
" (0, 1483)\t0.312538562607\n",
3583+
" (0, 1110)\t0.288286201631\n",
3584+
" (1, 11072)\t0.400615609824\n",
3585+
" (1, 10698)\t0.206363748132\n",
3586+
" (1, 8590)\t0.504340590131\n",
3587+
" (1, 7701)\t0.376740107081\n",
3588+
" (1, 3064)\t0.291199541124\n",
3589+
" (1, 2451)\t0.561988811929\n",
3590+
" (2, 11123)\t0.191043872205\n",
3591+
" (2, 11084)\t0.158981453472\n",
3592+
" (2, 10686)\t0.139955408208\n",
3593+
" :\t:\n",
3594+
" (5568, 6882)\t0.313674697762\n",
3595+
" (5568, 6691)\t0.477810764018\n",
3596+
" (5568, 6354)\t0.557572104865\n",
3597+
" (5568, 4880)\t0.385312208609\n",
3598+
" (5569, 10199)\t0.520467167164\n",
3599+
" (5569, 8252)\t0.432829970906\n",
3600+
" (5569, 3721)\t0.520467167164\n",
3601+
" (5569, 3228)\t0.520467167164\n",
3602+
" (5570, 11006)\t0.204345259945\n",
3603+
" (5570, 10787)\t0.228678434865\n",
3604+
" (5570, 9915)\t0.223802283762\n",
3605+
" (5570, 8420)\t0.226516757572\n",
3606+
" (5570, 7800)\t0.172438881848\n",
3607+
" (5570, 7394)\t0.307147523481\n",
3608+
" (5570, 7287)\t0.267866779355\n",
3609+
" (5570, 6984)\t0.264164044012\n",
3610+
" (5570, 6799)\t0.294185812624\n",
3611+
" (5570, 6699)\t0.200837653433\n",
3612+
" (5570, 6282)\t0.260770243908\n",
3613+
" (5570, 5251)\t0.302353515741\n",
3614+
" (5570, 5055)\t0.363572507445\n",
3615+
" (5570, 4508)\t0.347069257583\n",
3616+
" (5571, 10648)\t0.539218119882\n",
3617+
" (5571, 8348)\t0.485429154081\n",
3618+
" (5571, 3431)\t0.688187732787\n"
3619+
]
3620+
},
35643621
{
35653622
"data": {
35663623
"text/plain": [
3567-
"scipy.sparse.csr.csr_matrix"
3624+
"NoneType"
35683625
]
35693626
},
3570-
"execution_count": 60,
3627+
"execution_count": 61,
35713628
"metadata": {},
35723629
"output_type": "execute_result"
35733630
}
@@ -3577,6 +3634,57 @@
35773634
"type(print(messages_tfidf))"
35783635
]
35793636
},
3637+
{
3638+
"cell_type": "code",
3639+
"execution_count": 63,
3640+
"metadata": {},
3641+
"outputs": [
3642+
{
3643+
"data": {
3644+
"text/plain": [
3645+
"array([ 'Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...',\n",
3646+
" 'Ok lar... Joking wif u oni...',\n",
3647+
" \"Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's\",\n",
3648+
" ..., 'Pity, * was in mood for that. So...any other suggestions?',\n",
3649+
" \"The guy did some bitching but I acted like i'd be interested in buying something else next week and he gave it to us for free\",\n",
3650+
" 'Rofl. Its true to its name'], dtype=object)"
3651+
]
3652+
},
3653+
"execution_count": 63,
3654+
"metadata": {},
3655+
"output_type": "execute_result"
3656+
}
3657+
],
3658+
"source": [
3659+
"messages.Message.values"
3660+
]
3661+
},
3662+
{
3663+
"cell_type": "code",
3664+
"execution_count": 71,
3665+
"metadata": {},
3666+
"outputs": [
3667+
{
3668+
"data": {
3669+
"text/plain": [
3670+
"array([['ham'],\n",
3671+
" ['ham'],\n",
3672+
" ['spam'],\n",
3673+
" ..., \n",
3674+
" ['ham'],\n",
3675+
" ['ham'],\n",
3676+
" ['ham']], dtype=object)"
3677+
]
3678+
},
3679+
"execution_count": 71,
3680+
"metadata": {},
3681+
"output_type": "execute_result"
3682+
}
3683+
],
3684+
"source": [
3685+
" "
3686+
]
3687+
},
35803688
{
35813689
"cell_type": "code",
35823690
"execution_count": null,

Diff for: Natural Language Processing/NLP Getting Started.ipynb

+112-3
Original file line numberDiff line numberDiff line change
@@ -3558,16 +3558,73 @@
35583558
},
35593559
{
35603560
"cell_type": "code",
3561-
"execution_count": 60,
3561+
"execution_count": 61,
35623562
"metadata": {},
35633563
"outputs": [
3564+
{
3565+
"name": "stdout",
3566+
"output_type": "stream",
3567+
"text": [
3568+
" (0, 11163)\t0.230266855924\n",
3569+
" (0, 10965)\t0.190734285451\n",
3570+
" (0, 8917)\t0.247046523768\n",
3571+
" (0, 8336)\t0.170468692922\n",
3572+
" (0, 7668)\t0.264033840655\n",
3573+
" (0, 7555)\t0.312538562607\n",
3574+
" (0, 6937)\t0.183469241361\n",
3575+
" (0, 6906)\t0.151584746647\n",
3576+
" (0, 6217)\t0.189155577328\n",
3577+
" (0, 5769)\t0.24984711893\n",
3578+
" (0, 5218)\t0.268705938625\n",
3579+
" (0, 5217)\t0.298351840882\n",
3580+
" (0, 4653)\t0.312538562607\n",
3581+
" (0, 2060)\t0.242039602564\n",
3582+
" (0, 1483)\t0.312538562607\n",
3583+
" (0, 1110)\t0.288286201631\n",
3584+
" (1, 11072)\t0.400615609824\n",
3585+
" (1, 10698)\t0.206363748132\n",
3586+
" (1, 8590)\t0.504340590131\n",
3587+
" (1, 7701)\t0.376740107081\n",
3588+
" (1, 3064)\t0.291199541124\n",
3589+
" (1, 2451)\t0.561988811929\n",
3590+
" (2, 11123)\t0.191043872205\n",
3591+
" (2, 11084)\t0.158981453472\n",
3592+
" (2, 10686)\t0.139955408208\n",
3593+
" :\t:\n",
3594+
" (5568, 6882)\t0.313674697762\n",
3595+
" (5568, 6691)\t0.477810764018\n",
3596+
" (5568, 6354)\t0.557572104865\n",
3597+
" (5568, 4880)\t0.385312208609\n",
3598+
" (5569, 10199)\t0.520467167164\n",
3599+
" (5569, 8252)\t0.432829970906\n",
3600+
" (5569, 3721)\t0.520467167164\n",
3601+
" (5569, 3228)\t0.520467167164\n",
3602+
" (5570, 11006)\t0.204345259945\n",
3603+
" (5570, 10787)\t0.228678434865\n",
3604+
" (5570, 9915)\t0.223802283762\n",
3605+
" (5570, 8420)\t0.226516757572\n",
3606+
" (5570, 7800)\t0.172438881848\n",
3607+
" (5570, 7394)\t0.307147523481\n",
3608+
" (5570, 7287)\t0.267866779355\n",
3609+
" (5570, 6984)\t0.264164044012\n",
3610+
" (5570, 6799)\t0.294185812624\n",
3611+
" (5570, 6699)\t0.200837653433\n",
3612+
" (5570, 6282)\t0.260770243908\n",
3613+
" (5570, 5251)\t0.302353515741\n",
3614+
" (5570, 5055)\t0.363572507445\n",
3615+
" (5570, 4508)\t0.347069257583\n",
3616+
" (5571, 10648)\t0.539218119882\n",
3617+
" (5571, 8348)\t0.485429154081\n",
3618+
" (5571, 3431)\t0.688187732787\n"
3619+
]
3620+
},
35643621
{
35653622
"data": {
35663623
"text/plain": [
3567-
"scipy.sparse.csr.csr_matrix"
3624+
"NoneType"
35683625
]
35693626
},
3570-
"execution_count": 60,
3627+
"execution_count": 61,
35713628
"metadata": {},
35723629
"output_type": "execute_result"
35733630
}
@@ -3577,6 +3634,58 @@
35773634
"type(print(messages_tfidf))"
35783635
]
35793636
},
3637+
{
3638+
"cell_type": "code",
3639+
"execution_count": 72,
3640+
"metadata": {},
3641+
"outputs": [
3642+
{
3643+
"data": {
3644+
"text/plain": [
3645+
"array([ 'Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...',\n",
3646+
" 'Ok lar... Joking wif u oni...',\n",
3647+
" \"Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's\",\n",
3648+
" ..., 'Pity, * was in mood for that. So...any other suggestions?',\n",
3649+
" \"The guy did some bitching but I acted like i'd be interested in buying something else next week and he gave it to us for free\",\n",
3650+
" 'Rofl. Its true to its name'], dtype=object)"
3651+
]
3652+
},
3653+
"execution_count": 72,
3654+
"metadata": {},
3655+
"output_type": "execute_result"
3656+
}
3657+
],
3658+
"source": [
3659+
"# get the response variables\n",
3660+
"messages.Message.values"
3661+
]
3662+
},
3663+
{
3664+
"cell_type": "code",
3665+
"execution_count": 71,
3666+
"metadata": {},
3667+
"outputs": [
3668+
{
3669+
"data": {
3670+
"text/plain": [
3671+
"array([['ham'],\n",
3672+
" ['ham'],\n",
3673+
" ['spam'],\n",
3674+
" ..., \n",
3675+
" ['ham'],\n",
3676+
" ['ham'],\n",
3677+
" ['ham']], dtype=object)"
3678+
]
3679+
},
3680+
"execution_count": 71,
3681+
"metadata": {},
3682+
"output_type": "execute_result"
3683+
}
3684+
],
3685+
"source": [
3686+
" "
3687+
]
3688+
},
35803689
{
35813690
"cell_type": "code",
35823691
"execution_count": null,

0 commit comments

Comments
 (0)