Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ISPRAS API
sdk.ruby
Commits
5bc2de9c
Commit
5bc2de9c
authored
Sep 12, 2016
by
alexlag
Browse files
use JSON in texterra NLP
parent
4667fe35
Changes
4
Hide whitespace changes
Inline
Side-by-side
lib/ispras-api/ispras_api.rb
View file @
5bc2de9c
require
'json'
require
'httparty'
require
'nori'
require_relative
'./api_error'
...
...
@@ -17,20 +18,42 @@ class IsprasAPI
end
end
def
GET
(
path
=
''
,
params
=
{})
options
=
{
query:
params
}
def
GET
(
path
=
''
,
params
=
{},
format
=
:xml
)
options
=
{
headers:
headers
(
format
),
query:
params
}
response
=
self
.
class
.
get
"/
#{
path
}
"
,
options
response
.
code
==
200
?
response
.
parsed_response
:
check_error
(
response
)
end
def
POST
(
path
=
''
,
params
=
{},
form
=
{})
options
=
{
query:
params
,
body:
form
}
def
POST
(
path
=
''
,
params
=
{},
body
=
{},
format
=
:xml
)
options
=
{
headers:
headers
(
format
),
query:
params
,
body:
body
}
response
=
self
.
class
.
post
"/
#{
path
}
"
,
options
response
.
code
==
200
?
response
.
parsed_response
:
check_error
(
response
)
end
private
def
headers
(
format
)
case
(
format
)
when
:json
{
'Accept'
=>
'application/json'
}
when
:xml
{
'Accept'
=>
'application/xml'
}
else
{}
end
end
def
check_error
(
response
)
fail
ApiError
,
"
#{
response
.
code
}
Error occured"
end
...
...
lib/ispras-api/texterra/nlp.rb
View file @
5bc2de9c
...
...
@@ -5,7 +5,7 @@ module TexterraNLP
# Detects language of given text
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
language_detection_annotate
(
text
)
preset_nlp
(
:languageDetection
,
text
)
end
...
...
@@ -13,7 +13,7 @@ module TexterraNLP
# Detects boundaries of sentences in a given text
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
sentence_detection_annotate
(
text
)
preset_nlp
(
:sentenceDetection
,
text
)
end
...
...
@@ -21,7 +21,7 @@ module TexterraNLP
# Detects all tokens (minimal significant text parts) in a given text
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
tokenization_annotate
(
text
)
preset_nlp
(
:tokenization
,
text
)
end
...
...
@@ -29,7 +29,7 @@ module TexterraNLP
# Detects lemma of each word of a given text
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
lemmatization_annotate
(
text
)
preset_nlp
(
:lemmatization
,
text
)
end
...
...
@@ -37,7 +37,7 @@ module TexterraNLP
# Detects part of speech tag for each word of a given text
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
pos_tagging_annotate
(
text
)
preset_nlp
(
:posTagging
,
text
)
end
...
...
@@ -45,7 +45,7 @@ module TexterraNLP
# Tries to correct disprints and other spelling errors in a given text
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
spelling_correction_annotate
(
text
)
preset_nlp
(
:spellingCorrection
,
text
)
end
...
...
@@ -53,7 +53,7 @@ module TexterraNLP
# Finds all named entities occurences in a given text
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
named_entities_annotate
(
text
)
preset_nlp
(
:namedEntities
,
text
)
end
...
...
@@ -61,7 +61,7 @@ module TexterraNLP
# Extracts not overlapping terms within a given text; term is a textual representation for some concept of the real world
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
term_detection_annotate
(
text
)
preset_nlp
(
:termDetection
,
text
)
end
...
...
@@ -69,7 +69,7 @@ module TexterraNLP
# Detects the most appropriate meanings (concepts) for terms occurred in a given text
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
disambiguation_annotate
(
text
)
preset_nlp
(
:disambiguation
,
text
)
end
...
...
@@ -78,7 +78,7 @@ module TexterraNLP
# This service extracts a set of key concepts for a given text
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
key_concepts_annotate
(
text
)
preset_nlp
(
:keyConcepts
,
text
)
end
...
...
@@ -88,7 +88,7 @@ module TexterraNLP
# If no domain from this list has been detected, the text is assumed to be no domain, or general domain
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
domain_detection_annotate
(
text
)
preset_nlp
(
:domainDetection
,
text
)
end
...
...
@@ -96,7 +96,7 @@ module TexterraNLP
# Detects whether the given text is subjective or not
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
subjectivity_detection_annotate
(
text
)
preset_nlp
(
:subjectivityDetection
,
text
)
end
...
...
@@ -104,7 +104,7 @@ module TexterraNLP
# Detects whether the given text has positive, negative or no sentiment
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
polarity_detection_annotate
(
text
)
preset_nlp
(
:polarityDetection
,
text
)
end
...
...
@@ -115,21 +115,22 @@ module TexterraNLP
#
# @param [String] text Text to process
# @param [String] domain Domain for polarity detection
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
domain_polarity_detection_annotate
(
text
,
domain
=
''
)
specs
=
NLP_SPECS
[
:domainPolarityDetection
]
domain
=
"(
#{
domain
}
)"
unless
domain
.
empty?
result
=
POST
(
specs
[
:path
]
%
domain
,
specs
[
:params
],
text:
text
)[
:nlp_document
][
:annotations
][
:i_annotation
]
return
[]
if
result
.
nil?
result
=
[].
push
result
unless
result
.
is_a?
Array
result
.
map
{
|
e
|
assign_text
(
e
,
text
)
}
result
=
POST
(
specs
[
:path
]
%
domain
,
specs
[
:params
],
{
text:
text
},
:json
)
result
[
:annotations
].
each
do
|
key
,
value
|
value
.
map!
{
|
an
|
assign_text
(
an
,
text
)
}
end
result
end
# Detects Twitter-specific entities: Hashtags, User names, Emoticons, URLs.
# And also: Stop-words, Misspellings, Spelling suggestions, Spelling corrections
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
tweet_normalization
(
text
)
preset_nlp
(
:tweetNormalization
,
text
)
end
...
...
@@ -137,11 +138,13 @@ module TexterraNLP
# Detects Syntax relations in text. Only works for russian texts
#
# @param [String] text Text to process
# @return [
Array
] Texterra
annotations
# @return [
Hash
] Texterra
document
def
syntax_detection
(
text
)
preset_nlp
(
:syntaxDetection
,
text
).
each
do
|
an
|
an
[
:value
][
:parent_token
]
=
assign_text
(
an
[
:value
][
:parent_token
],
text
)
if
an
[
:value
]
&&
an
[
:value
][
:parent_token
]
result
=
preset_nlp
(
:syntaxDetection
,
text
)
result
[
:annotations
][
:'syntax-relation'
].
each
do
|
an
|
an
[
:value
][
:parent
]
=
assign_text
(
an
[
:value
][
:parent
],
text
)
if
an
[
:value
]
&&
an
[
:value
][
:parent
]
end
result
end
private
...
...
@@ -149,10 +152,11 @@ module TexterraNLP
# Utility NLP part method
def
preset_nlp
(
methodName
,
text
)
specs
=
NLP_SPECS
[
methodName
]
result
=
POST
(
specs
[
:path
],
specs
[
:params
],
text:
text
)[
:nlp_document
][
:annotations
][
:i_annotation
]
return
[]
if
result
.
nil?
result
=
[].
push
result
unless
result
.
is_a?
Array
result
.
map
{
|
an
|
assign_text
(
an
,
text
)
}
result
=
POST
(
specs
[
:path
],
specs
[
:params
],
{
text:
text
},
:json
)
result
[
:annotations
].
each
do
|
key
,
value
|
value
.
map!
{
|
an
|
assign_text
(
an
,
text
)
}
end
result
end
# Utility text assignement for annotation
...
...
lib/ispras-api/texterra_api.rb
View file @
5bc2de9c
...
...
@@ -38,20 +38,15 @@ class TexterraAPI < IsprasAPI
# @param [String] text Text to process
# @return [Array] Array of weighted key concepts
def
key_concepts
(
text
)
key_concepts
=
key_concepts_annotate
(
text
)[
0
][
:value
][
:concepts_weights
][
:entry
]
||
[]
key_concepts
=
[].
push
key_concepts
unless
key_concepts
.
is_a?
Array
key_concepts
.
map
do
|
kc
|
kc
[
:concept
][
:weight
]
=
kc
[
:double
]
kc
[
:concept
]
end
key_concepts
=
key_concepts_annotate
(
text
)[
:annotations
][
:keyconcepts
][
0
][
:value
]
||
[]
end
# Detects whether the given text has positive, negative or no sentiment
#
# @param [String] text Text to process
# @return [
Array
] Sentiment of the text
# @return [
String
] Sentiment of the text
def
sentiment_analysis
(
text
)
polarity_detection_annotate
(
text
)[
0
][
:value
].
to_s
||
'NEUTRAL'
polarity_detection_annotate
(
text
)[
:annotations
][
:polarity
][
0
][
:value
].
to_s
||
'NEUTRAL'
rescue
NoMethodError
'NEUTRAL'
end
...
...
@@ -66,9 +61,11 @@ class TexterraAPI < IsprasAPI
def
domain_sentiment_analysis
(
text
,
domain
=
''
)
used_domain
=
'general'
sentiment
=
'NEUTRAL'
(
domain_polarity_detection_annotate
(
text
,
domain
)
||
[]).
each
do
|
an
|
sentiment
=
an
[
:value
]
if
an
[
:@class
].
include?
'SentimentPolarity'
used_domain
=
an
[
:value
]
if
an
[
:@class
].
include?
'DomainAnnotation'
annotations
=
domain_polarity_detection_annotate
(
text
,
domain
)[
:annotations
]
begin
used_domain
=
annotations
[
:domain
][
0
][
:value
]
sentiment
=
annotations
[
:polarity
][
0
][
:value
]
rescue
NoMethodError
end
{
domain:
used_domain
,
...
...
@@ -81,7 +78,7 @@ class TexterraAPI < IsprasAPI
# @param [String] text Text to process
# @return [Array] Texterra annotations
def
disambiguation
(
text
)
disambiguation_annotate
(
text
)
disambiguation_annotate
(
text
)
[
:annotations
][
:'disambiguated-phrase'
]
end
def
custom_query
(
path
,
query
,
form
=
nil
)
...
...
test/test_texterra_api.rb
View file @
5bc2de9c
...
...
@@ -44,61 +44,123 @@ class TestTexterraAPI < Minitest::Test
end
def
test_tweet_normalization
assert_instance_of
Array
,
@texterra
.
tweet_normalization
(
@en_tweet
)
assert_instance_of
Array
,
@texterra
.
tweet_normalization
(
@ru_tweet
)
res
=
@texterra
.
tweet_normalization
(
@en_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@en_tweet
,
res
[
:text
]
res
=
@texterra
.
tweet_normalization
(
@ru_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@ru_tweet
,
res
[
:text
]
end
def
test_syntax_detection
assert_instance_of
Array
,
@texterra
.
syntax_detection
(
@ru_text
)
res
=
@texterra
.
syntax_detection
(
@ru_text
)
assert_instance_of
Hash
,
res
assert_equal
@ru_text
,
res
[
:text
]
end
def
test_language_detection_annotate
assert_instance_of
Array
,
@texterra
.
language_detection_annotate
(
@en_text
)
assert_instance_of
Array
,
@texterra
.
language_detection_annotate
(
@ru_text
)
assert_instance_of
Array
,
@texterra
.
language_detection_annotate
(
@en_tweet
)
assert_instance_of
Array
,
@texterra
.
language_detection_annotate
(
@ru_tweet
)
res
=
@texterra
.
language_detection_annotate
(
@en_text
)
assert_instance_of
Hash
,
res
assert_equal
@en_text
,
res
[
:text
]
res
=
@texterra
.
language_detection_annotate
(
@ru_text
)
assert_instance_of
Hash
,
res
assert_equal
@ru_text
,
res
[
:text
]
res
=
@texterra
.
language_detection_annotate
(
@en_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@en_tweet
,
res
[
:text
]
res
=
@texterra
.
language_detection_annotate
(
@ru_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@ru_tweet
,
res
[
:text
]
end
def
test_sentence_detection_annotate
assert_instance_of
Array
,
@texterra
.
sentence_detection_annotate
(
@en_text
)
assert_instance_of
Array
,
@texterra
.
sentence_detection_annotate
(
@ru_text
)
assert_instance_of
Array
,
@texterra
.
sentence_detection_annotate
(
@en_tweet
)
assert_instance_of
Array
,
@texterra
.
sentence_detection_annotate
(
@ru_tweet
)
res
=
@texterra
.
sentence_detection_annotate
(
@en_text
)
assert_instance_of
Hash
,
res
assert_equal
@en_text
,
res
[
:text
]
res
=
@texterra
.
sentence_detection_annotate
(
@ru_text
)
assert_instance_of
Hash
,
res
assert_equal
@ru_text
,
res
[
:text
]
res
=
@texterra
.
sentence_detection_annotate
(
@en_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@en_tweet
,
res
[
:text
]
res
=
@texterra
.
sentence_detection_annotate
(
@ru_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@ru_tweet
,
res
[
:text
]
end
def
test_tokenization_annotate
assert_instance_of
Array
,
@texterra
.
tokenization_annotate
(
@en_text
)
assert_instance_of
Array
,
@texterra
.
tokenization_annotate
(
@ru_text
)
assert_instance_of
Array
,
@texterra
.
tokenization_annotate
(
@en_tweet
)
assert_instance_of
Array
,
@texterra
.
tokenization_annotate
(
@ru_tweet
)
res
=
@texterra
.
tokenization_annotate
(
@en_text
)
assert_instance_of
Hash
,
res
assert_equal
@en_text
,
res
[
:text
]
res
=
@texterra
.
tokenization_annotate
(
@ru_text
)
assert_instance_of
Hash
,
res
assert_equal
@ru_text
,
res
[
:text
]
res
=
@texterra
.
tokenization_annotate
(
@en_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@en_tweet
,
res
[
:text
]
res
=
@texterra
.
tokenization_annotate
(
@ru_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@ru_tweet
,
res
[
:text
]
end
def
test_lemmatization_annotate
assert_instance_of
Array
,
@texterra
.
lemmatization_annotate
(
@en_text
)
assert_instance_of
Array
,
@texterra
.
lemmatization_annotate
(
@ru_text
)
assert_instance_of
Array
,
@texterra
.
lemmatization_annotate
(
@en_tweet
)
assert_instance_of
Array
,
@texterra
.
lemmatization_annotate
(
@ru_tweet
)
res
=
@texterra
.
lemmatization_annotate
(
@en_text
)
assert_instance_of
Hash
,
res
assert_equal
@en_text
,
res
[
:text
]
res
=
@texterra
.
lemmatization_annotate
(
@ru_text
)
assert_instance_of
Hash
,
res
assert_equal
@ru_text
,
res
[
:text
]
res
=
@texterra
.
lemmatization_annotate
(
@en_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@en_tweet
,
res
[
:text
]
res
=
@texterra
.
lemmatization_annotate
(
@ru_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@ru_tweet
,
res
[
:text
]
end
def
test_pos_tagging_annotate
assert_instance_of
Array
,
@texterra
.
pos_tagging_annotate
(
@en_text
)
assert_instance_of
Array
,
@texterra
.
pos_tagging_annotate
(
@ru_text
)
assert_instance_of
Array
,
@texterra
.
pos_tagging_annotate
(
@en_tweet
)
assert_instance_of
Array
,
@texterra
.
pos_tagging_annotate
(
@ru_tweet
)
res
=
@texterra
.
pos_tagging_annotate
(
@en_text
)
assert_instance_of
Hash
,
res
assert_equal
@en_text
,
res
[
:text
]
res
=
@texterra
.
pos_tagging_annotate
(
@ru_text
)
assert_instance_of
Hash
,
res
assert_equal
@ru_text
,
res
[
:text
]
res
=
@texterra
.
pos_tagging_annotate
(
@en_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@en_tweet
,
res
[
:text
]
res
=
@texterra
.
pos_tagging_annotate
(
@ru_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@ru_tweet
,
res
[
:text
]
end
def
test_named_entities_annotate
assert_instance_of
Array
,
@texterra
.
named_entities_annotate
(
@en_text
)
assert_instance_of
Array
,
@texterra
.
named_entities_annotate
(
@ru_text
)
assert_instance_of
Array
,
@texterra
.
named_entities_annotate
(
@en_tweet
)
assert_instance_of
Array
,
@texterra
.
named_entities_annotate
(
@ru_tweet
)
res
=
@texterra
.
named_entities_annotate
(
@en_text
)
assert_instance_of
Hash
,
res
assert_equal
@en_text
,
res
[
:text
]
res
=
@texterra
.
named_entities_annotate
(
@ru_text
)
assert_instance_of
Hash
,
res
assert_equal
@ru_text
,
res
[
:text
]
res
=
@texterra
.
named_entities_annotate
(
@en_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@en_tweet
,
res
[
:text
]
res
=
@texterra
.
named_entities_annotate
(
@ru_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@ru_tweet
,
res
[
:text
]
end
def
test_subjectivity_detection_annotate
assert_instance_of
Array
,
@texterra
.
subjectivity_detection_annotate
(
@en_text
)
assert_instance_of
Array
,
@texterra
.
subjectivity_detection_annotate
(
@ru_text
)
assert_instance_of
Array
,
@texterra
.
subjectivity_detection_annotate
(
@en_tweet
)
assert_instance_of
Array
,
@texterra
.
subjectivity_detection_annotate
(
@ru_tweet
)
res
=
@texterra
.
subjectivity_detection_annotate
(
@en_text
)
assert_instance_of
Hash
,
res
assert_equal
@en_text
,
res
[
:text
]
res
=
@texterra
.
subjectivity_detection_annotate
(
@ru_text
)
assert_instance_of
Hash
,
res
assert_equal
@ru_text
,
res
[
:text
]
res
=
@texterra
.
subjectivity_detection_annotate
(
@en_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@en_tweet
,
res
[
:text
]
res
=
@texterra
.
subjectivity_detection_annotate
(
@ru_tweet
)
assert_instance_of
Hash
,
res
assert_equal
@ru_tweet
,
res
[
:text
]
end
def
test_representation_terms
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment