// C API wrapper for RAGAnalyzer // This file provides C-compatible interface for CGO to call #ifndef RAG_ANALYZER_C_API_H #define RAG_ANALYZER_C_API_H #ifdef __cplusplus extern "C" { #endif #include #include // Opaque pointer to RAGAnalyzer typedef void* RAGAnalyzerHandle; // Callback function type for receiving tokens typedef void (*RAGTokenCallback)( const char* text, uint32_t len, uint32_t offset, uint32_t end_offset ); // Create a new RAGAnalyzer instance // path: path to dictionary files // Returns: handle to the analyzer, or NULL on failure RAGAnalyzerHandle RAGAnalyzer_Create(const char* path); // Destroy a RAGAnalyzer instance void RAGAnalyzer_Destroy(RAGAnalyzerHandle handle); // Load the analyzer (must be called before Analyze) // Returns: 0 on success, negative value on failure int RAGAnalyzer_Load(RAGAnalyzerHandle handle); // Set fine-grained mode void RAGAnalyzer_SetFineGrained(RAGAnalyzerHandle handle, bool fine_grained); // Set enable position tracking void RAGAnalyzer_SetEnablePosition(RAGAnalyzerHandle handle, bool enable_position); // Analyze text and call callback for each token // Returns: 0 on success, negative value on failure int RAGAnalyzer_Analyze( RAGAnalyzerHandle handle, const char* text, RAGTokenCallback callback ); // Simple analyze that returns tokens as a single space-separated string // Caller is responsible for freeing the returned string // Returns: dynamically allocated string (must call free()), or NULL on failure char* RAGAnalyzer_Tokenize(RAGAnalyzerHandle handle, const char* text); // Structure for a token with position information typedef struct { char* text; // Token text (must be freed with free()) uint32_t offset; // Byte offset of the token in the original text uint32_t end_offset; // Byte end offset of the token } RAGTokenWithPosition; // Helper functions to access token fields (for CGO) const char* RAGToken_GetText(void* token); uint32_t RAGToken_GetOffset(void* token); uint32_t RAGToken_GetEndOffset(void* token); // Structure for a list of tokens with positions typedef struct { RAGTokenWithPosition* tokens; // Array of tokens (must be freed with RAGAnalyzer_FreeTokenList) uint32_t count; // Number of tokens in the list } RAGTokenList; // Tokenize with position information // Caller is responsible for freeing the returned token list with RAGAnalyzer_FreeTokenList // Returns: dynamically allocated token list (must call RAGAnalyzer_FreeTokenList), or NULL on failure RAGTokenList* RAGAnalyzer_TokenizeWithPosition(RAGAnalyzerHandle handle, const char* text); // Free a token list allocated by RAGAnalyzer_TokenizeWithPosition void RAGAnalyzer_FreeTokenList(RAGTokenList* token_list); // Fine-grained tokenize: takes space-separated tokens and returns fine-grained tokens as space-separated string // Caller is responsible for freeing the returned string // Returns: dynamically allocated string (must call free()), or NULL on failure char* RAGAnalyzer_FineGrainedTokenize(RAGAnalyzerHandle handle, const char* tokens); // Get the frequency of a term (matching Python rag_tokenizer.freq) // Returns: frequency value, or 0 if term not found int32_t RAGAnalyzer_GetTermFreq(RAGAnalyzerHandle handle, const char* term); // Get the POS tag of a term (matching Python rag_tokenizer.tag) // Caller is responsible for freeing the returned string // Returns: dynamically allocated string (must call free()), or NULL if term not found or no tag char* RAGAnalyzer_GetTermTag(RAGAnalyzerHandle handle, const char* term); // Copy an existing RAGAnalyzer instance to create a new independent instance // This is useful for creating per-request analyzer instances in multi-threaded environments // The new instance shares the loaded dictionaries with the original but has independent internal state // Returns: handle to the new analyzer instance, or NULL on failure RAGAnalyzerHandle RAGAnalyzer_Copy(RAGAnalyzerHandle handle); // --------------------------------------------------------------------------- // Named Entity Recognition (spaCy model inference) // --------------------------------------------------------------------------- // Create a ThincNER inference handle. // model_ner_dir: path to the spaCy model's ner/ component directory // model_vocab_dir: path to the spaCy model's vocab/ directory (optional, can be NULL) // Returns: handle, or NULL on failure RAGAnalyzerHandle ThincNER_Create(const char* model_ner_dir, const char* model_vocab_dir); // Destroy a ThincNER handle. void ThincNER_Destroy(RAGAnalyzerHandle handle); // Run NER on pre-tokenized text. // tokens_json: JSON array e.g. ["Apple","Inc.","was","founded","by","Steve","Jobs","."] // Returns JSON array of entities, caller must free with ThincNER_FreeString. char* ThincNER_Predict(RAGAnalyzerHandle handle, const char* tokens_json); // Tokenize text using spaCy-compatible rules. // Returns JSON array of token strings, caller must free with ThincNER_FreeString. char* ThincNER_Tokenize(const char* text, const char* lang); // Free a string returned by ThincNER_Predict or ThincNER_Tokenize. void ThincNER_FreeString(char* ptr); #ifdef __cplusplus } #endif #endif // RAG_ANALYZER_C_API_H