Elasticsearch Search Engine Development

Comprehensive guide for building search engines with Elasticsearch, including indexing strategies, query optimization, and analytics implementation.

# Elasticsearch Search Engine Development

## 1. Elasticsearch Setup and Configuration

### Basic Cluster Configuration
```yaml
# docker-compose.yml
version: '3.8'
services:
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.10.0
    environment:
      - cluster.name=search-cluster
      - node.name=search-node-1
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms2g -Xmx2g"
      - xpack.security.enabled=false
      - discovery.type=single-node
    ulimits:
      memlock:
        soft: -1
        hard: -1
    volumes:
      - esdata:/usr/share/elasticsearch/data
    ports:
      - "9200:9200"
      - "9300:9300"

  kibana:
    image: docker.elastic.co/kibana/kibana:8.10.0
    ports:
      - "5601:5601"
    environment:
      ELASTICSEARCH_HOSTS: http://elasticsearch:9200
    depends_on:
      - elasticsearch

volumes:
  esdata:
    driver: local
```

### Production Configuration
```yaml
# elasticsearch.yml
cluster.name: production-search
node.name: ${HOSTNAME}
path.data: /var/lib/elasticsearch
path.logs: /var/log/elasticsearch
network.host: 0.0.0.0
http.port: 9200
discovery.seed_hosts: ["node-1", "node-2", "node-3"]
cluster.initial_master_nodes: ["node-1", "node-2", "node-3"]
bootstrap.memory_lock: true
indices.memory.index_buffer_size: 30%
thread_pool.search.queue_size: 10000
```

## 2. Index Design and Mapping

### Document Mapping Strategy
```typescript
interface ProductDocument {
  id: string;
  title: string;
  description: string;
  category: string;
  price: number;
  tags: string[];
  created_at: Date;
  updated_at: Date;
  availability: boolean;
  brand: string;
  rating: number;
  review_count: number;
}

const productMapping = {
  mappings: {
    properties: {
      title: {
        type: 'text',
        analyzer: 'english',
        fields: {
          keyword: { type: 'keyword' },
          suggest: {
            type: 'completion',
            analyzer: 'simple'
          }
        }
      },
      description: {
        type: 'text',
        analyzer: 'english'
      },
      category: {
        type: 'keyword',
        fields: {
          text: { type: 'text', analyzer: 'standard' }
        }
      },
      price: { type: 'float' },
      tags: { type: 'keyword' },
      created_at: { type: 'date' },
      updated_at: { type: 'date' },
      availability: { type: 'boolean' },
      brand: { type: 'keyword' },
      rating: { type: 'float' },
      review_count: { type: 'integer' }
    }
  },
  settings: {
    number_of_shards: 3,
    number_of_replicas: 1,
    analysis: {
      analyzer: {
        english: {
          tokenizer: 'standard',
          filter: ['lowercase', 'english_stemmer', 'english_stop']
        }
      },
      filter: {
        english_stemmer: {
          type: 'stemmer',
          language: 'english'
        },
        english_stop: {
          type: 'stop',
          stopwords: '_english_'
        }
      }
    }
  }
};
```

## 3. Search Client Implementation

### TypeScript Client Setup
```typescript
import { Client } from '@elastic/elasticsearch';

export class SearchService {
  private client: Client;

  constructor() {
    this.client = new Client({
      node: process.env.ELASTICSEARCH_URL || 'http://localhost:9200',
      auth: {
        username: process.env.ES_USERNAME,
        password: process.env.ES_PASSWORD
      },
      maxRetries: 3,
      requestTimeout: 30000,
      sniffOnStart: true
    });
  }

  async createIndex(indexName: string, mapping: any): Promise<void> {
    try {
      const exists = await this.client.indices.exists({ index: indexName });

      if (!exists) {
        await this.client.indices.create({
          index: indexName,
          body: mapping
        });
        console.log(`Index ${indexName} created successfully`);
      }
    } catch (error) {
      console.error('Error creating index:', error);
      throw error;
    }
  }

  async indexDocument(
    indexName: string,
    document: any,
    id?: string
  ): Promise<void> {
    try {
      await this.client.index({
        index: indexName,
        id,
        body: document,
        refresh: 'wait_for'
      });
    } catch (error) {
      console.error('Error indexing document:', error);
      throw error;
    }
  }

  async bulkIndex(indexName: string, documents: any[]): Promise<void> {
    const body = documents.flatMap(doc => [
      { index: { _index: indexName, _id: doc.id } },
      doc
    ]);

    try {
      const response = await this.client.bulk({
        refresh: 'wait_for',
        body
      });

      if (response.errors) {
        const erroredDocuments = response.items.filter(item =>
          item.index?.error
        );
        console.error('Bulk indexing errors:', erroredDocuments);
      }
    } catch (error) {
      console.error('Error bulk indexing:', error);
      throw error;
    }
  }
}
```

## 4. Advanced Search Queries

### Multi-Field Search with Boosting
```typescript
export class AdvancedSearch extends SearchService {
  async searchProducts(query: {
    q?: string;
    category?: string;
    priceRange?: { min: number; max: number };
    tags?: string[];
    availability?: boolean;
    sortBy?: 'relevance' | 'price' | 'rating' | 'date';
    page?: number;
    size?: number;
  }): Promise<SearchResult> {
    const {
      q = '',
      category,
      priceRange,
      tags = [],
      availability,
      sortBy = 'relevance',
      page = 1,
      size = 20
    } = query;

    const searchBody: any = {
      query: {
        bool: {
          must: [],
          filter: [],
          should: [],
          minimum_should_match: 0
        }
      },
      highlight: {
        fields: {
          title: {},
          description: {}
        }
      },
      aggs: {
        categories: {
          terms: { field: 'category', size: 20 }
        },
        price_ranges: {
          range: {
            field: 'price',
            ranges: [
              { to: 50 },
              { from: 50, to: 100 },
              { from: 100, to: 200 },
              { from: 200 }
            ]
          }
        },
        brands: {
          terms: { field: 'brand', size: 10 }
        }
      },
      from: (page - 1) * size,
      size
    };

    // Main search query
    if (q) {
      searchBody.query.bool.must.push({
        multi_match: {
          query: q,
          fields: [
            'title^3',
            'description^1',
            'tags^2',
            'brand^2'
          ],
          type: 'best_fields',
          fuzziness: 'AUTO'
        }
      });
    } else {
      searchBody.query.bool.must.push({ match_all: {} });
    }

    // Filters
    if (category) {
      searchBody.query.bool.filter.push({
        term: { category }
      });
    }

    if (priceRange) {
      searchBody.query.bool.filter.push({
        range: {
          price: {
            gte: priceRange.min,
            lte: priceRange.max
          }
        }
      });
    }

    if (tags.length > 0) {
      searchBody.query.bool.filter.push({
        terms: { tags }
      });
    }

    if (availability !== undefined) {
      searchBody.query.bool.filter.push({
        term: { availability }
      });
    }

    // Sorting
    if (sortBy !== 'relevance') {
      const sortMap = {
        price: [{ price: { order: 'asc' } }],
        rating: [{ rating: { order: 'desc' } }],
        date: [{ created_at: { order: 'desc' } }]
      };
      searchBody.sort = sortMap[sortBy];
    }

    try {
      const response = await this.client.search({
        index: 'products',
        body: searchBody
      });

      return this.formatSearchResponse(response);
    } catch (error) {
      console.error('Search error:', error);
      throw error;
    }
  }

  private formatSearchResponse(response: any): SearchResult {
    return {
      hits: response.hits.hits.map((hit: any) => ({
        id: hit._id,
        score: hit._score,
        source: hit._source,
        highlight: hit.highlight
      })),
      total: response.hits.total.value,
      aggregations: response.aggregations,
      took: response.took
    };
  }
}
```

## 5. Auto-Complete and Suggestions

### Completion Suggester Implementation
```typescript
export class AutoCompleteService extends SearchService {
  async getSuggestions(
    prefix: string,
    size: number = 10
  ): Promise<Suggestion[]> {
    try {
      const response = await this.client.search({
        index: 'products',
        body: {
          suggest: {
            title_suggest: {
              prefix,
              completion: {
                field: 'title.suggest',
                size,
                skip_duplicates: true
              }
            }
          },
          _source: false
        }
      });

      return response.suggest.title_suggest[0].options.map((option: any) => ({
        text: option.text,
        score: option._score
      }));
    } catch (error) {
      console.error('Suggestion error:', error);
      return [];
    }
  }

  async getTermSuggestions(
    text: string,
    field: string = 'title'
  ): Promise<string[]> {
    try {
      const response = await this.client.search({
        index: 'products',
        body: {
          suggest: {
            term_suggest: {
              text,
              term: {
                field,
                suggest_mode: 'popular',
                sort: 'frequency'
              }
            }
          },
          _source: false
        }
      });

      return response.suggest.term_suggest[0].options.map(
        (option: any) => option.text
      );
    } catch (error) {
      console.error('Term suggestion error:', error);
      return [];
    }
  }

  async getPhraseSuggestions(
    text: string,
    field: string = 'title'
  ): Promise<string[]> {
    try {
      const response = await this.client.search({
        index: 'products',
        body: {
          suggest: {
            phrase_suggest: {
              text,
              phrase: {
                field,
                max_errors: 2,
                confidence: 0.5,
                direct_generator: [
                  {
                    field,
                    suggest_mode: 'always',
                    max_edits: 2
                  }
                ]
              }
            }
          },
          _source: false
        }
      });

      return response.suggest.phrase_suggest[0].options.map(
        (option: any) => option.text
      );
    } catch (error) {
      console.error('Phrase suggestion error:', error);
      return [];
    }
  }
}
```

## 6. Performance Optimization

### Query Optimization Strategies
```typescript
export class PerformanceOptimizer {
  // Use filters instead of queries when possible
  static buildOptimizedQuery(searchParams: any) {
    return {
      query: {
        bool: {
          must: searchParams.textQuery ? [searchParams.textQuery] : [],
          filter: [
            ...searchParams.termFilters,
            ...searchParams.rangeFilters
          ]
        }
      },
      // Use source filtering to reduce payload
      _source: {
        includes: ['title', 'price', 'rating', 'availability'],
        excludes: ['description', 'internal_*']
      },
      // Use request cache for repeated searches
      request_cache: true,
      // Limit deep pagination
      from: Math.min(searchParams.from, 10000),
      size: Math.min(searchParams.size, 100)
    };
  }

  // Index template for time-based indices
  static createTimeBasedTemplate() {
    return {
      index_patterns: ['logs-*'],
      template: {
        settings: {
          number_of_shards: 1,
          number_of_replicas: 0,
          refresh_interval: '30s',
          index: {
            lifecycle: {
              name: 'logs-policy',
              rollover_alias: 'logs'
            }
          }
        },
        mappings: {
          properties: {
            '@timestamp': { type: 'date' },
            message: { type: 'text', index: false },
            level: { type: 'keyword' },
            service: { type: 'keyword' }
          }
        }
      }
    };
  }
}
```

## 7. Analytics and Monitoring

### Search Analytics Implementation
```typescript
export class SearchAnalytics {
  private searchService: SearchService;

  constructor(searchService: SearchService) {
    this.searchService = searchService;
  }

  async trackSearchEvent(event: {
    query: string;
    results_count: number;
    user_id?: string;
    session_id: string;
    timestamp: Date;
    clicked_results?: string[];
  }): Promise<void> {
    await this.searchService.indexDocument('search_analytics', {
      ...event,
      '@timestamp': event.timestamp
    });
  }

  async getPopularQueries(
    timeRange: { from: Date; to: Date },
    size: number = 20
  ): Promise<any[]> {
    const response = await this.searchService.client.search({
      index: 'search_analytics',
      body: {
        query: {
          range: {
            '@timestamp': {
              gte: timeRange.from,
              lte: timeRange.to
            }
          }
        },
        aggs: {
          popular_queries: {
            terms: {
              field: 'query.keyword',
              size,
              order: { _count: 'desc' }
            }
          }
        },
        size: 0
      }
    });

    return response.aggregations.popular_queries.buckets;
  }

  async getZeroResultQueries(
    timeRange: { from: Date; to: Date }
  ): Promise<string[]> {
    const response = await this.searchService.client.search({
      index: 'search_analytics',
      body: {
        query: {
          bool: {
            must: [
              {
                range: {
                  '@timestamp': {
                    gte: timeRange.from,
                    lte: timeRange.to
                  }
                }
              },
              {
                term: { results_count: 0 }
              }
            ]
          }
        },
        aggs: {
          zero_result_queries: {
            terms: {
              field: 'query.keyword',
              size: 100
            }
          }
        },
        size: 0
      }
    });

    return response.aggregations.zero_result_queries.buckets.map(
      (bucket: any) => bucket.key
    );
  }
}
```

## 8. Real-time Indexing Pipeline

### Event-Driven Indexing
```typescript
import { EventEmitter } from 'events';

export class RealTimeIndexer extends EventEmitter {
  private searchService: SearchService;
  private batchSize: number = 100;
  private batchTimeout: number = 5000;
  private batch: any[] = [];
  private batchTimer?: NodeJS.Timeout;

  constructor(searchService: SearchService) {
    super();
    this.searchService = searchService;
    this.setupEventHandlers();
  }

  private setupEventHandlers(): void {
    this.on('document_created', this.handleDocumentCreated.bind(this));
    this.on('document_updated', this.handleDocumentUpdated.bind(this));
    this.on('document_deleted', this.handleDocumentDeleted.bind(this));
  }

  private async handleDocumentCreated(document: any): Promise<void> {
    this.addToBatch({
      action: 'index',
      document
    });
  }

  private async handleDocumentUpdated(document: any): Promise<void> {
    this.addToBatch({
      action: 'update',
      document
    });
  }

  private async handleDocumentDeleted(id: string): Promise<void> {
    this.addToBatch({
      action: 'delete',
      id
    });
  }

  private addToBatch(operation: any): void {
    this.batch.push(operation);

    if (this.batch.length >= this.batchSize) {
      this.processBatch();
    } else if (!this.batchTimer) {
      this.batchTimer = setTimeout(() => {
        this.processBatch();
      }, this.batchTimeout);
    }
  }

  private async processBatch(): Promise<void> {
    if (this.batch.length === 0) return;

    const currentBatch = [...this.batch];
    this.batch = [];

    if (this.batchTimer) {
      clearTimeout(this.batchTimer);
      this.batchTimer = undefined;
    }

    try {
      const body = currentBatch.flatMap(operation => {
        switch (operation.action) {
          case 'index':
            return [
              { index: { _index: 'products', _id: operation.document.id } },
              operation.document
            ];
          case 'update':
            return [
              { update: { _index: 'products', _id: operation.document.id } },
              { doc: operation.document }
            ];
          case 'delete':
            return [
              { delete: { _index: 'products', _id: operation.id } }
            ];
          default:
            return [];
        }
      });

      await this.searchService.client.bulk({
        refresh: 'wait_for',
        body
      });

      console.log(`Processed batch of ${currentBatch.length} operations`);
    } catch (error) {
      console.error('Error processing batch:', error);
      // Implement retry logic or dead letter queue
    }
  }
}
```

## 9. Testing Strategy

### Comprehensive Test Suite
```typescript
import { describe, test, expect, beforeAll, afterAll } from '@jest/jest';
import { SearchService } from '../src/search-service';

describe('SearchService', () => {
  let searchService: SearchService;
  const testIndex = 'test_products';

  beforeAll(async () => {
    searchService = new SearchService();
    await searchService.createIndex(testIndex, productMapping);

    // Index test data
    const testProducts = [
      {
        id: '1',
        title: 'iPhone 14 Pro',
        description: 'Latest Apple smartphone',
        category: 'electronics',
        price: 999,
        brand: 'Apple',
        rating: 4.8
      },
      {
        id: '2',
        title: 'Samsung Galaxy S23',
        description: 'Android flagship phone',
        category: 'electronics',
        price: 899,
        brand: 'Samsung',
        rating: 4.6
      }
    ];

    await searchService.bulkIndex(testIndex, testProducts);
  });

  afterAll(async () => {
    await searchService.client.indices.delete({ index: testIndex });
  });

  test('should search products by title', async () => {
    const results = await searchService.searchProducts({
      q: 'iPhone',
      size: 10
    });

    expect(results.hits.length).toBeGreaterThan(0);
    expect(results.hits[0].source.title).toContain('iPhone');
  });

  test('should filter by category', async () => {
    const results = await searchService.searchProducts({
      category: 'electronics',
      size: 10
    });

    expect(results.hits.length).toBe(2);
    results.hits.forEach(hit => {
      expect(hit.source.category).toBe('electronics');
    });
  });

  test('should handle fuzzy search', async () => {
    const results = await searchService.searchProducts({
      q: 'iPhoen', // intentional typo
      size: 10
    });

    expect(results.hits.length).toBeGreaterThan(0);
    expect(results.hits[0].source.title).toContain('iPhone');
  });

  test('should return aggregations', async () => {
    const results = await searchService.searchProducts({
      q: '',
      size: 10
    });

    expect(results.aggregations.categories).toBeDefined();
    expect(results.aggregations.brands).toBeDefined();
  });
});
```

## 10. Production Deployment

### Docker and Kubernetes Deployment
```yaml
# k8s-elasticsearch.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: elasticsearch
spec:
  serviceName: elasticsearch
  replicas: 3
  selector:
    matchLabels:
      app: elasticsearch
  template:
    metadata:
      labels:
        app: elasticsearch
    spec:
      containers:
      - name: elasticsearch
        image: docker.elastic.co/elasticsearch/elasticsearch:8.10.0
        ports:
        - containerPort: 9200
        - containerPort: 9300
        env:
        - name: cluster.name
          value: "production-cluster"
        - name: node.name
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: discovery.seed_hosts
          value: "elasticsearch-0.elasticsearch,elasticsearch-1.elasticsearch,elasticsearch-2.elasticsearch"
        - name: cluster.initial_master_nodes
          value: "elasticsearch-0,elasticsearch-1,elasticsearch-2"
        - name: ES_JAVA_OPTS
          value: "-Xms2g -Xmx2g"
        resources:
          requests:
            memory: "4Gi"
            cpu: "1"
          limits:
            memory: "4Gi"
            cpu: "2"
        volumeMounts:
        - name: data
          mountPath: /usr/share/elasticsearch/data
  volumeClaimTemplates:
  - metadata:
      name: data
    spec:
      accessModes: ["ReadWriteOnce"]
      storageClassName: "fast-ssd"
      resources:
        requests:
          storage: 100Gi
```

## Implementation Checklist

- [ ] Set up Elasticsearch cluster with proper configuration
- [ ] Design index mappings for your data structure
- [ ] Implement search service with TypeScript client
- [ ] Create advanced search queries with filters and aggregations
- [ ] Build auto-complete and suggestion features
- [ ] Optimize query performance and indexing strategies
- [ ] Implement search analytics and monitoring
- [ ] Set up real-time indexing pipeline
- [ ] Write comprehensive tests for search functionality
- [ ] Deploy to production with monitoring and alerting
- [ ] Configure index lifecycle management
- [ ] Implement security and access controls

This guide provides a production-ready foundation for building sophisticated search engines with Elasticsearch, covering everything from basic setup to advanced features and deployment strategies.
Elasticsearch Search Engine Development - Cursor IDE AI Rule