shimatta-kenkyusho/shimatta_kenkyusho/shimatta_modules/ShimattaSearchLanguage.py
stefan bbcbf6ab3d added basic search lexer and parser enabling search for parameters and attributes - could be extended
the error handling is very basic, but effective
some missing features like ordering
...and maybe a potential security as the search will allow search for all attributes somehow related to the component model
2025-02-03 22:18:55 +01:00

185 lines
5.8 KiB
Python

import re
from django.db.models import Q
from sly import Lexer, Parser
from shimatta_modules.EngineeringNumberConverter import EngineeringNumberConverter
class ShimattaSearchConstants():
'''
Just a bunch of conversions and regular expression stored here.
'''
# convert the prefixes from the engineering number converter be used in the lexer
PREFIX_DICT = {k: v for k, v in EngineeringNumberConverter.prefixes if k}
PREFIX_DICT.update({k: v for k, v in EngineeringNumberConverter.it_prefixes if k})
PREFIX_RULE = r'(' + r'|'.join([rf'(?:{p})' for p in PREFIX_DICT.keys()]) + r')?'
TEXTUAL_REGEX = r'(?:([^"\s]+))|(?:"([^"]*)")'
VALUE_REGEX = rf'(\-?\d+(?:\.\d+)?){PREFIX_RULE}'
TEXTUAL_PATTERN = re.compile(TEXTUAL_REGEX)
VALUE_PATTERN = re.compile(VALUE_REGEX)
class ShimattaSearchLexer(Lexer):
'''
Stupid lexer to tokenize a search string.
'''
tokens = {GT, LT, GTE, LTE, EQ, NEQ, AND, OR, LPAREN, RPAREN, NUMBER, TEXTUAL}
# ignore whitespace only characters as well as newlines
ignore = ' \t\n'
# Regular expression rules for simple tokens
GT = r'>'
LT = r'<'
GTE = r'>='
LTE = r'<='
EQ = r'=='
NEQ = r'!='
AND = r'&'
OR = r'\|'
LPAREN = r'\('
RPAREN = r'\)'
def __init__(self):
self.errors = []
super().__init__()
@_(ShimattaSearchConstants.VALUE_REGEX)
def NUMBER(self, t):
'''
Parse numbers with engineering unit prefixes
'''
match = ShimattaSearchConstants.VALUE_PATTERN.match(t.value)
t.value = float(match.group(1))
prefix = match.group(2)
if prefix:
t.value *= ShimattaSearchConstants.PREFIX_DICT[prefix]
return t
@_(ShimattaSearchConstants.TEXTUAL_REGEX)
def TEXTUAL(self, t):
'''
Find texts with or without param_ prefix (used to filter for parameters)
'''
match = ShimattaSearchConstants.TEXTUAL_PATTERN.match(t.value)
# strip the quotation marks
value = match.group(1)
if match.group(2):
value = match.group(2)
t.value = value
return t
def error(self, t):
self.errors.append(f'Line {self.lineno}: Bad character {t.value}')
self.index += 1
class ShimattaSearchParser(Parser):
# Get the token list from the lexer (required)
tokens = ShimattaSearchLexer.tokens
def __init__(self):
self.errors = []
super().__init__()
@staticmethod
def _get_filter(key, value, compare_suffix='', invert=False):
'''
Assemble a filter to grep data from the relational database structure
'''
# filter for params - stored in two separate tables
if key.startswith('param_'):
key = key[len('param_'):]
key_query = Q(Q(**{f'componentparameter__parameter_type__parameter_name': key})| \
Q(**{f'package__packageparameter__parameter_type__parameter_name': key}))
if isinstance(value, str):
query = Q(Q(**{f'componentparameter__text_value{compare_suffix}': value})| \
Q(**{f'package__packageparameter__text_value{compare_suffix}': value}))&key_query
else:
query = Q(Q(**{f'componentparameter__value{compare_suffix}': value})| \
Q(**{f'package__packageparameter__value{compare_suffix}': value}))&key_query
# filter for direct attributes - or whatever the user throws into the search input
else:
query = Q(**{f'{key}{compare_suffix}': value})
if invert:
query = ~query
return Q(query)
# ruleset
@_('expression : textual GT number')
def expression(self, p):
return self._get_filter(p.textual.strip(), p.number, '__gt', False)
@_('expression : textual LT number')
def expression(self, p):
return self._get_filter(p.textual.strip(), p.number, '__lt', False)
@_('expression : textual GTE number')
def expression(self, p):
return self._get_filter(p.textual.strip(), p.number, '__gte', False)
@_('expression : textual LTE number')
def expression(self, p):
return self._get_filter(p.textual.strip(), p.number, '__lte', False)
@_('expression : textual EQ number')
def expression(self, p):
return self._get_filter(p.textual.strip(), p.number, '', False)
@_('expression : textual NEQ number')
def expression(self, p):
return self._get_filter(p.textual.strip(), p.number, '', True)
@_('expression : textual EQ textual')
def expression(self, p):
return self._get_filter(p.textual0.strip(), p.textual1, '', False)
@_('expression : textual NEQ textual')
def expression(self, p):
return self._get_filter(p.textual0.strip(), p.textual1, '', True)
@_('TEXTUAL')
def textual(self, p):
return p.TEXTUAL
@_('NUMBER')
def number(self, p):
return p.NUMBER
@_('expression : LPAREN expression RPAREN')
def expression(self, p):
return Q(p.expression)
@_('expression : expression AND expression')
def expression(self, p):
return p.expression0&p.expression1
@_('expression : expression OR expression')
def expression(self, p):
return p.expression0|p.expression1
@_('expression')
def expression(self, p):
return p.expression
# Error rule for syntax errors
def error(self, p):
self.errors.append(f'Syntax error in input {p}!')
class ShimattaSearchLanguage():
def __init__(self):
self.lexer = ShimattaSearchLexer()
self.parser = ShimattaSearchParser()
def search_for_components(self, search_string):
query = self.parser.parse(self.lexer.tokenize(search_string))
return query, self.lexer.errors + self.parser.errors