Topic Identifier API for Python#

class eot.wowool.topic_identifier.TopicIdentifier#

The model contains information about topic candidates and in how many documents in the collection they appear.

__init__(language: str, count: int = 5, threshold: int = 0, topic_model: str = '', domains: Optional[List[Union[str, eot.wowool.native.core.domain.Domain]]] = None, ignore_entities: bool = False, engine: Optional[eot.wowool.native.core.engine.Engine] = None)#
Parameters
  • language (str) – Language to process the input document.

  • count (str) – The number of topics to be returned. default = 5

  • threshold (str) – The lower threshold in percentage. [0-100]

  • topic_model (str) – The reference file created with create_topic_model.

  • domains (list[str, Domain]) – List of domains you want to process before generating topics

  • engine (eot.wowool.native.core.Engine) – The engine that will cache the domains and models.

topic_init.py#
from eot.wowool.native.core import Language
from eot.wowool.topic_identifier.topic_identifier import TopicIdentifier
from eot.test.corpus import Corpus
from eot.wowool.document import Document

english = Language("english")
topic_it = TopicIdentifier("english", count=5)
# add the movie folder, containing all the movie files.
corpus = Corpus("english/movies")

print("Adding the corpus files to the model for better results.")
# Note: this is not strictly necessary.
for ip in corpus:
    print(f"adding: {ip.id}")
    topic_it.add(english(Document(ip)))
print("Topics/file")
# display the results of every file, by iterating over every file.
for ip in corpus:
    doc = english(Document(ip))
    doc = topic_it(doc)
    topics = doc.results('eot_topics')
    print(f"# {doc.id}")
    for topic in topics:
        print(f" - {topic}")
__call__(document: eot.wowool.document.document.Document, model: Optional[eot.wowool.topic_identifier.topic_identifier.Model] = None) eot.wowool.document.document.Document#

Add topics to a given Document object

Parameters

document (eot.wowool.Document) – The Document object we want to add the topics to.

topic_call.py#
from eot.wowool.native.core import Language
from eot.wowool.topic_identifier import TopicIdentifier

english = Language("english")
number_of_topics = 5
topic_it = TopicIdentifier(language="english", count=number_of_topics)
# display the results of every file, by iterating over every file.
document = topic_it(english("This is the effect of the green house gases"))
for topic in document.results('eot_topics'):
    print(f" - {topic}")
class eot.wowool.topic_identifier.topic_identifier.Model#
__init__(basic_model, ignore_entities)#