Source code for cw.audiences.models

"""
Audience segmentation models.

This module provides database models for:
- Geographic segmentation: Regions, Countries, Languages
- Compositional insights at each level (Region → Country → Language)

Models relocated from core app to establish dedicated audiences namespace.
"""

from django.db import models



[docs]
class Region(models.Model):
    """Cultural/market grouping with regional insights.

    Examples: North America, Nordics, DACH, LATAM

    Regional insights capture broad cultural patterns that apply across
    multiple countries in the region (e.g., Nordic minimalism, North American
    directness).
    """

    code = models.CharField(
        max_length=20,
        unique=True,
        help_text="Short code (e.g., 'NA', 'NORDICS', 'DACH')",
    )
    name = models.CharField(
        max_length=100,
        help_text="Display name (e.g., 'North America')",
    )
    description = models.TextField(
        blank=True,
        help_text="Region description and scope",
    )
    insights = models.JSONField(
        default=list,
        help_text="Regional cultural patterns: [{heading, points[]}]",
    )
    is_active = models.BooleanField(default=True)
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)

    class Meta:
        db_table = "core_region"
        ordering = ["name"]
        verbose_name = "Region"
        verbose_name_plural = "Regions"

    def __str__(self):
        return f"{self.name} ({self.code})"


[docs]
    def insights_as_markdown(self) -> str:
        """Render structured insights as markdown."""
        if not self.insights:
            return ""

        sections = []
        for section in self.insights:
            heading = section.get("heading", "")
            points = section.get("points", [])

            if heading:
                lines = [f"### {heading}"]
                for point in points:
                    lines.append(f"- {point}")
                sections.append("\n".join(lines))

        return "\n\n".join(sections)





[docs]
class Country(models.Model):
    """Political/regulatory entity with country-specific insights.

    Examples: United States, Canada, Sweden, Switzerland

    Country insights capture regulatory requirements and local cultural
    nuances specific to that country.
    """

    code = models.CharField(
        max_length=2,
        unique=True,
        help_text="ISO 3166-1 alpha-2 code (e.g., 'US', 'CA', 'SE')",
    )
    name = models.CharField(
        max_length=100,
        help_text="Official country name",
    )
    default_language = models.ForeignKey(
        "Language",
        on_delete=models.PROTECT,
        related_name="default_for_countries",
        null=True,
        blank=True,
        help_text="Primary/default language for this country",
    )
    insights = models.JSONField(
        default=list,
        help_text="Country-specific regulatory and cultural rules: [{heading, points[]}]",
    )
    notes = models.TextField(blank=True)
    is_active = models.BooleanField(default=True)
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)

    # M2M relationships
    regions = models.ManyToManyField(
        Region,
        through="CountryRegion",
        related_name="countries",
        blank=True,
        help_text="Regions this country belongs to",
    )
    languages = models.ManyToManyField(
        "Language",
        through="CountryLanguage",
        related_name="countries",
        blank=True,
        help_text="All languages spoken in this country",
    )

    class Meta:
        db_table = "core_country"
        ordering = ["name"]
        verbose_name = "Country"
        verbose_name_plural = "Countries"

    def __str__(self):
        return f"{self.name} ({self.code})"


[docs]
    def insights_as_markdown(self) -> str:
        """Render structured insights as markdown."""
        if not self.insights:
            return ""

        sections = []
        for section in self.insights:
            heading = section.get("heading", "")
            points = section.get("points", [])

            if heading:
                lines = [f"### {heading}"]
                for point in points:
                    lines.append(f"- {point}")
                sections.append("\n".join(lines))

        return "\n\n".join(sections)



[docs]
    def get_primary_languages(self):
        """Get languages marked as primary for this country."""
        return self.languages.filter(countrylanguage__is_primary=True)





[docs]
class Language(models.Model):
    """Language variant with locale code and LLM model recommendations.

    Examples: en-US, fr-CA, de-CH, es-MX

    Each language has locale-specific insights and recommended LLM models
    for generating content in that language variant.
    """

    code = models.CharField(
        max_length=10,
        unique=True,
        help_text="ISO 639 + country locale (e.g., 'en-US', 'fr-CA')",
    )
    name = models.CharField(
        max_length=100,
        help_text="Display name (e.g., 'English (United States)')",
    )
    base_language = models.CharField(
        max_length=10,
        blank=True,
        default="",
        help_text="ISO 639-1 base language code (e.g., 'en', 'fr', 'de')",
    )
    primary_model = models.ForeignKey(
        "core.LLMModel",
        on_delete=models.PROTECT,
        related_name="primary_for_languages",
        help_text="Recommended LLM model for this language",
    )
    alternative_models = models.ManyToManyField(
        "core.LLMModel",
        through="LanguageAlternativeModel",
        related_name="alternative_for_languages",
        blank=True,
        help_text="Alternative LLM models that can handle this language",
    )
    insights = models.JSONField(
        default=list,
        help_text="Language-specific localization guidance: [{heading, points[]}]",
    )
    notes = models.TextField(
        blank=True,
        help_text="Notes about language-specific considerations.",
    )
    is_active = models.BooleanField(
        default=True,
        help_text="Whether this language is available for adaptations.",
    )
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)

    class Meta:
        db_table = "core_language"
        ordering = ["name"]
        verbose_name = "Language"
        verbose_name_plural = "Languages"
        indexes = [
            models.Index(fields=["base_language", "is_active"], name="lang_base_active_idx"),
        ]

    def __str__(self):
        return f"{self.name} ({self.code})"


[docs]
    def insights_as_markdown(self) -> str:
        """Render structured insights as markdown."""
        if not self.insights:
            return ""

        sections = []
        for section in self.insights:
            heading = section.get("heading", "")
            points = section.get("points", [])

            if heading:
                lines = [f"### {heading}"]
                for point in points:
                    lines.append(f"- {point}")
                sections.append("\n".join(lines))

        return "\n\n".join(sections)



[docs]
    def get_all_models(self):
        """Get primary model plus all alternatives as a queryset."""
        from django.db.models import Q

        from cw.core.models import LLMModel

        return LLMModel.objects.filter(
            Q(pk=self.primary_model_id) | Q(alternative_for_languages=self)
        ).distinct()




# M2M Through Tables



[docs]
class CountryRegion(models.Model):
    """Many-to-many through table for Country ↔ Region relationship.

    Allows countries to belong to multiple regions (e.g., Switzerland in
    both DACH and EU-WEST).
    """

    country = models.ForeignKey(Country, on_delete=models.CASCADE)
    region = models.ForeignKey(Region, on_delete=models.CASCADE)

    class Meta:
        db_table = "core_country_region"
        unique_together = [["country", "region"]]
        verbose_name = "Country-Region Mapping"
        verbose_name_plural = "Country-Region Mappings"

    def __str__(self):
        return f"{self.country.code} → {self.region.code}"




[docs]
class CountryLanguage(models.Model):
    """Many-to-many through table for Country ↔ Language relationship.

    Tracks which languages are spoken in each country, with is_primary flag
    to indicate the default/official language.
    """

    country = models.ForeignKey(Country, on_delete=models.CASCADE)
    language = models.ForeignKey(Language, on_delete=models.CASCADE)
    is_primary = models.BooleanField(
        default=False,
        help_text="Primary/official language for this country",
    )

    class Meta:
        db_table = "core_country_language"
        unique_together = [["country", "language"]]
        verbose_name = "Country-Language Mapping"
        verbose_name_plural = "Country-Language Mappings"

    def __str__(self):
        primary_marker = " (primary)" if self.is_primary else ""
        return f"{self.country.code} → {self.language.code}{primary_marker}"




[docs]
class LanguageAlternativeModel(models.Model):
    """Many-to-many through table for Language ↔ LLMModel alternatives.

    Tracks alternative LLM models that can handle a specific language,
    separate from the primary model recommendation.
    """

    language = models.ForeignKey(Language, on_delete=models.CASCADE)
    llmmodel = models.ForeignKey("core.LLMModel", on_delete=models.CASCADE)

    class Meta:
        db_table = "core_language_alternative_model"
        unique_together = [["language", "llmmodel"]]
        verbose_name = "Language Alternative Model"
        verbose_name_plural = "Language Alternative Models"

    def __str__(self):
        return f"{self.language.code} → {self.llmmodel.name}"



# Non-Geographic Segmentation Models



[docs]
class Segment(models.Model):
    """Non-geographic audience segment (demographic, behavioral, psychographic).

    Examples:
    - Demographic: Household Income → Middle-Income
    - Behavioral: Usage Pattern → First-Time Users
    - Psychographic: Emotional Driver → Nostalgia

    Geographic segmentation uses existing Region/Country/Language models.
    """

    CATEGORY_CHOICES = [
        ("DEMOGRAPHIC", "Demographic"),
        ("BEHAVIORAL", "Behavioral"),
        ("PSYCHOGRAPHIC", "Psychographic"),
    ]

    category = models.CharField(
        max_length=20,
        choices=CATEGORY_CHOICES,
        help_text="Segment category",
    )
    vector = models.CharField(
        max_length=100,
        help_text="Dimension being segmented (e.g., 'Household Income', 'Emotional Driver')",
    )
    value = models.CharField(
        max_length=100,
        help_text="Position on the dimension (e.g., 'Middle-Income', 'Escapism')",
    )
    description = models.TextField(
        blank=True,
        help_text="Optional longer description",
    )
    insights = models.JSONField(
        default=list,
        help_text="Structured insights: [{heading, points[]}]",
    )
    is_active = models.BooleanField(default=True)
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)

    class Meta:
        db_table = "audiences_segment"
        ordering = ["category", "vector", "value"]
        verbose_name = "Segment"
        verbose_name_plural = "Segments"
        unique_together = [["category", "vector", "value"]]

    def __str__(self):
        return f"{self.get_category_display()}: {self.vector} → {self.value}"


[docs]
    def insights_as_markdown(self) -> str:
        """Render structured insights as markdown."""
        if not self.insights:
            return ""

        sections = []
        for section in self.insights:
            heading = section.get("heading", "")
            points = section.get("points", [])

            if heading:
                lines = [f"### {heading}"]
                for point in points:
                    lines.append(f"- {point}")
                sections.append("\n".join(lines))

        return "\n\n".join(sections)





[docs]
class Persona(models.Model):
    """Named collection of segments representing a target audience profile.

    A Persona combines geographic segments (Region/Country/Language) with
    non-geographic segments (Demographic/Behavioral/Psychographic) to create
    a complete audience profile.

    Example: "Budget-Conscious First-Timer"
    - Geographic: North America / United States / English (en-US)
    - Demographic: Household Income → Middle-Income
    - Behavioral: Usage Pattern → First-Time Users
    - Psychographic: Emotional Driver → Nostalgia
    """

    name = models.CharField(
        max_length=200,
        help_text="Persona name (e.g., 'Budget-Conscious First-Timer')",
    )
    description = models.TextField(
        blank=True,
        help_text="Optional description of this persona",
    )

    # Geographic segments (reuse existing models)
    region = models.ForeignKey(
        Region,
        on_delete=models.PROTECT,
        null=True,
        blank=True,
        related_name="personas",
        help_text="Target region",
    )
    country = models.ForeignKey(
        Country,
        on_delete=models.PROTECT,
        null=True,
        blank=True,
        related_name="personas",
        help_text="Target country",
    )
    language = models.ForeignKey(
        Language,
        on_delete=models.PROTECT,
        null=True,
        blank=True,
        related_name="personas",
        help_text="Target language",
    )

    # Non-geographic segments
    segments = models.ManyToManyField(
        Segment,
        through="PersonaSegment",
        related_name="personas",
        blank=True,
        help_text="Demographic, behavioral, and psychographic segments",
    )

    is_active = models.BooleanField(default=True)
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)

    class Meta:
        db_table = "audiences_persona"
        ordering = ["name"]
        verbose_name = "Persona"
        verbose_name_plural = "Personas"

    def __str__(self):
        parts = [self.name]
        if self.region or self.country or self.language:
            geo = []
            if self.region:
                geo.append(self.region.code)
            if self.country:
                geo.append(self.country.code)
            if self.language:
                geo.append(self.language.code)
            parts.append(f"({'/'.join(geo)})")
        return " ".join(parts)


[docs]
    def segment_count(self) -> int:
        """Count of attached non-geographic segments."""
        return self.segments.count()





[docs]
class PersonaSegment(models.Model):
    """Many-to-many through table for Persona ↔ Segment relationship.

    Explicit through table for future extensibility.
    Segments are automatically sorted by category and vector.
    """

    persona = models.ForeignKey(Persona, on_delete=models.CASCADE)
    segment = models.ForeignKey(Segment, on_delete=models.CASCADE)

    class Meta:
        db_table = "audiences_persona_segment"
        unique_together = [["persona", "segment"]]
        ordering = ["segment__category", "segment__vector", "segment__value"]
        verbose_name = "Persona Segment"
        verbose_name_plural = "Persona Segments"

    def __str__(self):
        return f"{self.persona.name} → {self.segment}"



# ---------------------------------------------------------------------------
# World Values Survey Data
# ---------------------------------------------------------------------------



[docs]
class WVSProfile(models.Model):
    """Raw World Values Survey data for a country and wave.

    Stores all WVS variable means as a JSON object, enabling dynamic
    lookups by theme. The pipeline can query specific variables based
    on script concept (e.g., environmental themes → V33, V34, V81).

    The ``raw_data`` field holds ``{variable_code: mean_value}`` pairs
    for all ~945 variables in a given country-wave combination.
    """

    country = models.ForeignKey(
        Country,
        on_delete=models.CASCADE,
        related_name="wvs_profiles",
        help_text="Country this profile belongs to",
    )
    wave = models.IntegerField(
        help_text="WVS wave number (1-7)",
    )
    raw_data = models.JSONField(
        default=dict,
        help_text="All WVS variable means: {variable_code: value}",
    )
    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)

    class Meta:
        db_table = "audiences_wvs_profile"
        unique_together = [["country", "wave"]]
        ordering = ["country__code", "-wave"]
        verbose_name = "WVS Profile"
        verbose_name_plural = "WVS Profiles"

    def __str__(self):
        return f"{self.country.code} — Wave {self.wave}"

    @property
    def variable_count(self) -> int:
        """Number of variables with non-null values."""
        if not self.raw_data:
            return 0
        return sum(1 for v in self.raw_data.values() if v is not None)