Commit 714e5c78 authored by alexlag's avatar alexlag
Browse files

KBM+test added, minor fixes

parent ba35dfc7
require_relative './kbm_specs'
module TexterraKBM
include TexterraKBMSpecs
# Determines if Knowledge base contains the specified term
#
# @param term [String] term
# @return [Hash] with :presence field
def term_presence(term)
presetKBM :termPresence, term
end
# Returns information measure for the given term. Information measure denotes, how often given term is used as link caption among all its occurences
#
# @param term [String] term
# @result [Hash] with :infomeasure field
def term_info_measure(term)
presetKBM :termInfoMeasure, term
end
# Return concepts resource from the Knowledge base corresponding to the found meanings of the given term
#
# @param term [String] term
# @result [Hash] with :elements field
def term_meanings(term)
presetKBM :termMeanings, term
end
# If concept isn't provided, returns concepts with their commonness, corresponding to the found meanings of the given term. Commonness denotes, how often the given term is associated with the given concept.
# With concept(format is {id}:{kbname}) returns commonness of given concept for the given term.
#
# @param term [String] term
# @param concept [String] concept as {id}:{kbname}
# @result [Hash] with :elements field
def term_commonness(term, concept='')
concept = "id=#{concept}" unless concept.empty?
presetKBM :termCommonness, [term, concept]
end
# Return neighbour concepts for the given concepts(list or single concept, each concept is {id}:{kbname}).
# If at least one traverse parameter(check REST Documentation for values) is specified, all other parameters should also be specified
#
# @param concepts [String, Array] either concept as {id}:{kbname} or array of such concepts
# @param traverse_params [Hash] optional, should contain :linkType, :nodeType, :minDepth, :maxDepth keys with values
# @result [Hash] with :elements field
def neighbours(concepts, traverse_params={})
traverse = traverse_params.inject('') do |res, (name, value)|
res += ";#{name}=#{value}"
end unless traverse_params.empty?
presetKBM :neighbours, [wrap_concepts(concepts), traverse]
end
# Return neighbour concepts size for the given concepts(list or single concept, each concept is {id}:{kbname}).
#
# @param concepts [String, Array] either concept as {id}:{kbname} or array of such concepts
# @param traverse_params [Hash] optional, should contain :linkType, :nodeType, :minDepth, :maxDepth keys with values
# If at least one traverse parameter(check REST Documentation for values) is specified, all other parameters should also be specified
# @result [Hash] with :size field
def neighbours_size(concepts, traverse_params={})
traverse = traverse_params.inject('') do |res, (name, value)|
res += ";#{name}=#{value}"
end unless traverse_params.empty?
presetKBM :neighbours, [wrap_concepts(concepts), "#{traverse}/size"]
end
# Compute similarity for each pair of concepts(list or single concept, each concept is {id}:{kbname}).
#
# @param concepts [Array] array of concepts as {id}:{kbname}
# @param linkWeight [String] pecifies method for computation of link weight in case of multiple link types - check REST Documentation for values
def similarity_graph(concepts, linkWeight='MAX')
presetKBM :similarityGraph, "#{wrap_concepts(concepts)}linkWeight=#{linkWeight}"
end
# Computes sum of similarities from each concepts(list or single concept, each concept is {id}:{kbname}) from the first list to all concepts(list or single concept, each concept is {id}:{kbname}) from the second one.
#
# @param first_concepts [Array] array of concepts as {id}:{kbname}
# @param second_concepts [Array] array of concepts as {id}:{kbname}
# @param linkWeight [String] pecifies method for computation of link weight in case of multiple link types - check REST Documentation for values
def all_pairs_similarity(first_concepts, second_concepts, linkWeight='MAX')
presetKBM :allPairsSimilarity, ["#{wrap_concepts(first_concepts)}linkWeight=#{linkWeight}", wrap_concepts(second_concepts)]
end
# Compute similarity from each concept from the first list to all concepts(list or single concept, each concept is {id}:{kbname}) from the second list as a whole.
# Links of second list concepts(each concept is {id}:{kbname}) are collected together, thus forming a "virtual" article, similarity to which is computed.
#
# @param concepts [Array] array of concepts as {id}:{kbname}
# @param virtual_aricle [Array] array of concepts as {id}:{kbname}
# @param linkWeight [String] pecifies method for computation of link weight in case of multiple link types - check REST Documentation for values
def similarity_to_virtual_article(concepts, virtual_aricle, linkWeight='MAX')
presetKBM :similarityToVirtualArticle, ["#{wrap_concepts(concepts)}linkWeight=#{linkWeight}", wrap_concepts(virtual_aricle)]
end
# Compute similarity between two sets of concepts(list or single concept, each concept is {id}:{kbname}) as between "virtual" articles from these sets.
# The links of each virtual article are composed of links of the collection of concepts.
#
# @param first_virtual_aricle [Array] array of concepts as {id}:{kbname}
# @param second_virtual_article [Array] array of concepts as {id}:{kbname}
# @param linkWeight [String] pecifies method for computation of link weight in case of multiple link types - check REST Documentation for values
def similarity_between_virtual_articles(first_virtual_aricle, second_virtual_article, linkWeight='MAX')
presetKBM :similarityBetweenVirtualArticle, ["#{wrap_concepts(first_virtual_aricle)}linkWeight=#{linkWeight}", wrap_concepts(second_virtual_article)]
end
# Search for similar concepts among the first neighbours of the given ones(list or single concept, each concept is {id}:{kbname}).
#
# @param concepts [Array] array of concepts as {id}:{kbname}
# @param linkWeight [String] pecifies method for computation of link weight in case of multiple link types - check REST Documentation for values
# @param params [Hash]
# => linkWeight [String] pecifies method for computation of link weight in case of multiple link types - check REST Documentation for values
# => offset [Integer] provides a possibility to skip several concepts from the start of the result
# => limit [Integer] provides a possibility to limit size of result
# check REST Documentation for values
def similar_over_first_neighbours(concepts, params={linkWeight:'MAX'})
presetKBM :similarOverFirstNeighbours, "#{wrap_concepts(concepts)};linkWeight=#{params[:linkWeight]}", params
end
# Search for similar concepts over filtered set of the first and the second neighbours of the given ones(list or single concept, each concept is {id}:{kbname}).
#
# @param concepts [Array] array of concepts as {id}:{kbname}
# @param linkWeight [String] pecifies method for computation of link weight in case of multiple link types - check REST Documentation for values
# @param params [Hash]
# => :linkWeight [String] pecifies method for computation of link weight in case of multiple link types - check REST Documentation for values
# => :offset [Integer] provides a possibility to skip several concepts from the start of the result
# => :limit [Integer] provides a possibility to limit size of result
# => :among [SimilarCandidatesFilter] specifies how to filter neighbour concepts when searching for most similar
# check REST Documentation for values
def similar_over_filtered_neighbours(concepts, params={linkWeight:'MAX'})
presetKBM :similarOverFilteredNeighbours, "#{wrap_concepts(concepts)};linkWeight=#{params[:linkWeight]}", params
end
# Get attributes for concepts(list or single concept, each concept is {id}:{kbname})
#
# @param concepts [String, Array] either concept as {id}:{kbname} or array of such concepts
# @param attributes [Array] specifies attributes to be included into response
# => Supported attributes:
# coordinates - GPS coordinates
# definition - brief concept definition
# url(<language>) - URL to page with description of the given concept on the specified language
# <language> - language code, like: en, de, fr, ko, ru, ...
# synonym - different textual representations of the concept
# title - concept title
# translation(<language>) textual representation of the concept on the specified language
# <language> - language code, like: en, de, fr, ko, ru, ...
# type - concept type
def get_attributes(concepts, attributes=[])
presetKBM :getAttributes, wrap_concepts(concepts), attribute: attributes
end
private
# Utility wrapper for matrix parameters
def wrap_concepts(concepts)
if concepts.is_a? Array
concepts.map { |c| "id=#{c};" }.join
else
"id=#{concepts};"
end
end
# Utility EKB part method
def presetKBM(methodName, pathParam, queryParam={})
specs = KBMSpecs[methodName]
queryParam.merge specs[:params]
GET(specs[:path] % pathParam, queryParam)
end
end
\ No newline at end of file
......@@ -44,6 +44,10 @@ module TexterraKBMSpecs
similarOverFilteredNeighbours: {
path: 'similarity/%s/similar/all',
params: {}
},
getAttributes: {
path: 'walker/%s',
params: {}
}
}
end
\ No newline at end of file
......@@ -109,14 +109,6 @@ module TexterraNLP
presetNLP(:polarityDetection, text)
end
# Extracts aspect-sentiment pairs from the given text. Currently only movie domain is supported
#
# @param text [String] text to process
# @return [Array] Texterra annotations
def aspect_extraction_annotate(text)
presetNLP(:aspectExtraction, text)
end
# Detects whether the given text has positive, negative, or no sentiment, with respect to domain.
# If domain isn't provided, Domain detection is applied, this way method tries to achieve best results.
# If no domain is detected general domain algorithm is applied
......@@ -127,6 +119,7 @@ module TexterraNLP
specs = NLPSpecs[:domainPolarityDetection]
domain = '(%s)' % domain unless domain.empty?
result = POST(specs[:path] % domain, specs[:params], {text: text})[:nlp_document][:annotations][:i_annotation]
return [] if result.nil?
result = [].push result unless result.is_a? Array
result.each do |e|
st, en = e[:start].to_i, e[:end].to_i
......@@ -149,6 +142,7 @@ module TexterraNLP
def presetNLP(methodName, text)
specs = NLPSpecs[methodName]
result = POST(specs[:path], specs[:params], {text: text})[:nlp_document][:annotations][:i_annotation]
return [] if result.nil?
result = [].push result unless result.is_a? Array
result.each do |e|
st, en = e[:start].to_i, e[:end].to_i
......
......@@ -24,6 +24,7 @@ class TexterraAPI < IsprasAPI
# @return [Array] list of weighted key concepts
def key_concepts(text)
key_concepts = key_concepts_annotate(text)[0][:value][:concepts_weights][:entry] || []
key_concepts = [].push key_concepts unless key_concepts.is_a? Array
key_concepts.map { |kc|
kc[:concept][:weight] = kc[:double]
kc[:concept]
......@@ -62,20 +63,6 @@ class TexterraAPI < IsprasAPI
}
end
# Extracts aspect-sentiment pairs from the given text. Currently only movie domain is supported
#
# @param text [String] text to process
# @return [Array] list of found aspects
def aspect_extraction(text)
(aspect_extraction_annotate(text) || []).map do |asp|
{
text: as[:text],
aspect: as[:value][:aspect],
polarity: as[:value][:polarity]
}
end
end
# Detects the most appropriate meanings (concepts) for terms occurred in a given text
#
# @param text [String] text to process
......@@ -84,6 +71,10 @@ class TexterraAPI < IsprasAPI
disambiguation_annotate(text)
end
def custom_query(path, query, form=nil)
form.nil? ? GET(path, query) : POST(path, query, form)
end
private
def check_error(response)
......
......@@ -7,7 +7,7 @@ class TestTexterraAPI < Minitest::Test
def setup
@texterra = TexterraAPI.new ENV['KEY'], ENV['SERVICE_NAME'], ENV['SERVICE_VERSION']
@en_text = 'Apple today updated iMac to bring numerous high-performance enhancements to the =s leading all-in-one desktop. iMac now features fourth-generation Intel Core processors, new graphics, and next-generation Wi-Fi. In addition, it now supports PCIe-based flash storage, making its Fusion Drive and all-flash storage options up to 50 percent faster than the previous generation'
@en_text = 'Apple today updated iMac to bring numerous high-performance enhancements to the leading all-in-one desktop. iMac now features fourth-generation Intel Core processors, new graphics, and next-generation Wi-Fi. In addition, it now supports PCIe-based flash storage, making its Fusion Drive and all-flash storage options up to 50 percent faster than the previous generation'
@ru_text = 'Первые в этом году переговоры министра иностранных дел России Сергея Лаврова и госсекретаря США Джона Керри, длившиеся 1,5 часа, завершились в Мюнхене.'
@en_tweet = 'mentioning veterens care which Mccain has voted AGAINST - SUPER GOOOOD point Obama+1 #tweetdebate'
@ru_tweet = 'В мастерской готовят пушку и автомобили 1940-х годов, для участия в Параде Победы в Ново-Переделкино.'
......@@ -16,6 +16,8 @@ class TestTexterraAPI < Minitest::Test
def test_key_concepts
assert_instance_of Array, @texterra.key_concepts(@en_text)
assert_instance_of Array, @texterra.key_concepts(@ru_text)
assert_instance_of Array, @texterra.key_concepts(@en_tweet)
assert_instance_of Array, @texterra.key_concepts(@ru_tweet)
end
def test_disambiguation
......@@ -30,11 +32,112 @@ class TestTexterraAPI < Minitest::Test
assert_instance_of String, @texterra.sentiment_analysis(@ru_tweet)
end
def test_domain_sentiment_analysis
assert_instance_of Hash, @texterra.domain_sentiment_analysis(@en_text)
assert_instance_of Hash, @texterra.domain_sentiment_analysis(@ru_text)
res = @texterra.domain_sentiment_analysis(@en_tweet, 'politics')
assert_instance_of Hash, res
assert_equal 'politics', res[:domain]
assert_raises ApiError do
@texterra.domain_sentiment_analysis(@ru_text, 'politics')
end
end
def test_tweet_normalization
assert_instance_of Array, @texterra.tweet_normalization(@en_tweet)
assert_raises ApiError do
@texterra.tweet_normalization(@ru_text)
@texterra.tweet_normalization(@ru_tweet)
end
end
def test_language_detection_annotate
assert_instance_of Array, @texterra.language_detection_annotate(@en_text)
assert_instance_of Array, @texterra.language_detection_annotate(@ru_text)
assert_instance_of Array, @texterra.language_detection_annotate(@en_tweet)
assert_instance_of Array, @texterra.language_detection_annotate(@ru_tweet)
end
def test_named_entities_annotate
assert_instance_of Array, @texterra.named_entities_annotate(@en_text)
assert_instance_of Array, @texterra.named_entities_annotate(@ru_text)
assert_instance_of Array, @texterra.named_entities_annotate(@en_tweet)
assert_instance_of Array, @texterra.named_entities_annotate(@ru_tweet)
end
def test_subjectivity_detection_annotate
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_text)
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_text)
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_tweet)
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_tweet)
end
def test_term_presence
res = @texterra.term_presence('Anarchism')
assert_instance_of Hash, res
assert_equal true, res[:presence]
end
def test_term_info_measure
assert_instance_of Hash, @texterra.term_info_measure('Anarchism')
end
def test_term_meanings
assert_instance_of Hash, @texterra.term_meanings('android')
end
def test_term_commonness
assert_instance_of Hash, @texterra.term_commonness('android')
assert_instance_of Hash, @texterra.term_commonness('android', '713:enwiki')
end
def test_neignbours
assert_instance_of Hash, @texterra.neighbours('12:enwiki')
assert_instance_of Hash, @texterra.neighbours('12:enwiki', linkType: 'RELATED', nodeType: 'REGULAR', minDepth: 1, maxDepth: 3)
assert_instance_of Hash, @texterra.neighbours(['12:enwiki', '713:enwiki'])
assert_instance_of Hash, @texterra.neighbours(['12:enwiki', '713:enwiki'], linkType: 'RELATED', nodeType: 'REGULAR', minDepth: 1, maxDepth: 3)
end
def test_neignbours_size
assert_instance_of Hash, @texterra.neighbours_size('12:enwiki')
assert_instance_of Hash, @texterra.neighbours_size('12:enwiki', linkType: 'RELATED', nodeType: 'REGULAR', minDepth: 1, maxDepth: 3)
assert_instance_of Hash, @texterra.neighbours_size(['12:enwiki', '713:enwiki'])
assert_instance_of Hash, @texterra.neighbours_size(['12:enwiki', '713:enwiki'], linkType: 'RELATED', nodeType: 'REGULAR', minDepth: 1, maxDepth: 3)
end
def test_similarity_graph
assert_instance_of Hash, @texterra.similarity_graph(['12:enwiki','13137:enwiki','156327:enwiki'])
assert_instance_of Hash, @texterra.similarity_graph(['12:enwiki','13137:enwiki','156327:enwiki'], 'MIN')
end
def test_all_pairs_similarity
assert_instance_of Hash, @texterra.all_pairs_similarity(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
assert_instance_of Hash, @texterra.all_pairs_similarity(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
end
def test_similarity_to_virtual_article
assert_instance_of Hash, @texterra.similarity_to_virtual_article(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
assert_instance_of Hash, @texterra.similarity_to_virtual_article(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
end
def test_similarity_between_virtual_articles
assert_instance_of Hash, @texterra.similarity_between_virtual_articles(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
assert_instance_of Hash, @texterra.similarity_between_virtual_articles(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
end
def test_similar_over_first_neighbours
assert_instance_of Hash, @texterra.similar_over_first_neighbours('12:enwiki')
assert_instance_of Hash, @texterra.similar_over_first_neighbours('12:enwiki', linkWeight: 'MIN', offset: 1, limit: 3)
end
def test_similar_over_filtered_neighbours
assert_instance_of Hash, @texterra.similar_over_filtered_neighbours('12:enwiki')
assert_instance_of Hash, @texterra.similar_over_filtered_neighbours('12:enwiki', linkWeight: 'MIN', offset: 1, limit: 3, among: 'PORTION(0.2)')
end
def test_get_attributes
assert_instance_of Hash, @texterra.get_attributes('12:enwiki')
assert_instance_of Hash, @texterra.get_attributes(['12:enwiki', '13137:enwiki'])
assert_instance_of Hash, @texterra.get_attributes('12:enwiki', ['url(en)', 'type'])
assert_instance_of Hash, @texterra.get_attributes(['12:enwiki', '13137:enwiki'], ['url(en)', 'title'])
end
end
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment