perf: optimize regex compilation in entity extraction
Move regular expression compilation to the module level in `dialect.py` to prevent repeated parsing during loop execution. Co-authored-by: igorls <4753812+igorls@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,14 @@
|
||||
import pytest
|
||||
import timeit
|
||||
import re
|
||||
|
||||
from mempalace.dialect import Dialect
|
||||
|
||||
def test_detect_entities_benchmark():
|
||||
dialect = Dialect()
|
||||
text = "Alice went to the market and met Bob who is a nice guy. They both discussed about Dr. Chen and how he solved the big issue. Another sentence with Name and Name2 and SomeName"
|
||||
|
||||
# Run the function multiple times to measure the performance
|
||||
number = 10000
|
||||
time = timeit.timeit(lambda: dialect._detect_entities_in_text(text), number=number)
|
||||
print(f"\nDialect._detect_entities_in_text benchmark: {time:.4f} seconds for {number} iterations")
|
||||
Reference in New Issue
Block a user