from __future__ import annotations

from dataclasses import dataclass
from enum import Enum

import itables
import numpy as np
import pandas as pd
from IPython.display import Markdown

pd.set_option('future.no_silent_downcasting', True)
itables.init_notebook_mode()

responses = pd.read_csv("survey_responses.csv")
responses

class InterpolationOption(Enum):
    IMPLICIT_BUILDER = "implicit-builder"
    EXPLICIT = "explicit"
    IMPLICIT_ONLY_STRING = "implicit-only-string"
    IMPLICIT_NO_BUILDER = "implicit-no-builder"
    EXTENSIBLE_TH = "extensible-th"
    EXTENSIBLE_HASCLASS = "extensible-hasclass"

def rank_col(option: str | InterpolationOption):
    if isinstance(option, InterpolationOption):
        option = option.value
    return f"Rank options [{option}]"

def approval_col(option: str | InterpolationOption):
    if isinstance(option, InterpolationOption):
        option = option.value
    return f"Which options do you like? [{option}]"

@dataclass(frozen=True)
class Ballot:
    # ranking[0] is the top rank
    ranking: list[InterpolationOption]

    @classmethod
    def load(cls, data: pd.Series) -> Ballot:
        ranking_map = {
            int(rank): option
            for option in InterpolationOption.__members__.values()
            if not np.isnan(rank := data[rank_col(option)])
        }
        ranking = [option for _, option in sorted(ranking_map.items())]
        return cls(ranking=ranking)

    def get_top(self) -> InterpolationOption | None:
        return self.ranking[0] if self.ranking else None

    def get_rank(self, option: InterpolationOption) -> int | None:
        try:
            return self.ranking.index(option)
        except ValueError:
            return None

    def get_next(self, option: InterpolationOption) -> InterpolationOption | None:
        i = self.get_rank(option)
        if i is not None and i + 1 < len(self.ranking):
            return self.ranking[i + 1]
        else:
            return None

@dataclass(frozen=True)
class Scoreboard:
    votes: dict[InterpolationOption, list[Ballot]]

    @classmethod
    def init(cls, ballots: list[Ballot]) -> Scoreboard:
        votes = {
            option: [ballot for ballot in ballots if ballot.get_top() == option]
            for option in InterpolationOption.__members__.values()
        }
        return cls(votes=votes)

    def get_tally(self) -> RoundTally:
        tally = {option: len(ballots) for option, ballots in self.votes.items()}
        return RoundTally(tally=tally, total=sum(tally.values()))

    def remove(self, loser: InterpolationOption) -> Scoreboard:
        new_votes = self.votes.copy()
        loser_ballots = new_votes.pop(loser)

        for ballot in loser_ballots:
            new_vote = loser
            while new_vote is not None and new_vote not in new_votes:
                new_vote = ballot.get_next(new_vote)
            if new_vote is not None:
                new_votes[new_vote].append(ballot)

        return Scoreboard(votes=new_votes)

@dataclass(frozen=True)
class RoundTally:
    tally: dict[InterpolationOption, int]
    total: int

    def to_pandas(self) -> pd.DataFrame:
        options, counts = zip(*self.tally.items())
        return (
            pd.DataFrame({"option": [o.value for o in options], "count": counts})
            .set_index("option")
            .sort_values("count", ascending=False)
        )

    def get_winner(self) -> InterpolationOption | None:
        for option, count in self.tally.items():
            if count > self.total / 2:
                return option
        return None

    def get_loser(self) -> InterpolationOption:
        return min(self.tally.items(), key=lambda pair: pair[1])[0]

ballots = [
    Ballot.load(data)
    for _, data in responses.iterrows()
]

scoreboard = Scoreboard.init(ballots)
for round_num in range(len(InterpolationOption.__members__)):
    display(Markdown(f"---\n**Round {round_num + 1}**"))
    round_tally = scoreboard.get_tally()
    display(round_tally.to_pandas())
    winner = round_tally.get_winner()
    if winner:
        display(Markdown(f"Winner: **{winner.value}**"))
        break

    loser = round_tally.get_loser()
    scoreboard = scoreboard.remove(loser)

approvals = pd.DataFrame({
    option.value: responses[approval_col(option)].replace(np.nan, "N/A")
    for option in InterpolationOption.__members__.values()
})
approvals

approval_counts = approvals.apply(lambda s: s.value_counts()).reindex([
    "Really unhappy",
    "Somewhat unhappy",
    "Ambivalent",
    "Somewhat happy",
    "Really happy",
    "N/A",
]).transpose().sort_values("Really happy", ascending=False)
approval_counts

pd.DataFrame({
    "Unhappy": approval_counts["Really unhappy"] + approval_counts["Somewhat unhappy"],
    "Ambivalent": approval_counts["Ambivalent"] + approval_counts["N/A"],
    "Happy": approval_counts["Really happy"] + approval_counts["Somewhat happy"],
}).sort_values("Happy", ascending=False)

SQL."select * from users where name = ${name}"

SQL.fromParts
  [ SQL.fromString "select * from users where name = "
  , SQL.interpolate name
  ]

# rows ranking implicit-builder above implicit-no-builder
prefer_ib_rows = responses[
    ~np.isnan(responses[rank_col("implicit-builder")])
    & (
        np.isnan(responses[rank_col("implicit-no-builder")])
        | (responses[rank_col("implicit-builder")] < responses[rank_col("implicit-no-builder")])
    )
]

# choice after implicit-builders
(
    prefer_ib_rows
    .apply(Ballot.load, axis="columns")
    .apply(lambda ballot: ballot.get_next(InterpolationOption.IMPLICIT_BUILDER))
    .apply(lambda option: option.value if option else "N/A")
    .value_counts()
)

ENCODED_APPROVALS = {
    "Really unhappy": 1,
    "Somewhat unhappy": 2,
    "Ambivalent": 3,
    "Somewhat happy": 4,
    "Really happy": 5,
}

def is_inconsistent(row):
    ballot = Ballot.load(row)
    approvals = row.replace(ENCODED_APPROVALS)

    ordered_approvals = [
        approvals[f"Which options do you like? [{option.value}]"]
        for option in ballot.ranking
    ]
    for a, b in zip(ordered_approvals, ordered_approvals[1:]):
        if a < b:
            return True
    return False

inconsistent_responses = responses[responses.apply(is_inconsistent, axis="columns")]
display(Markdown(f"Number of inconsistent responses: {len(inconsistent_responses)}"))
display(inconsistent_responses)

ib_responses = responses[[
    col for col in responses
    if "implicit-no-builder" in col or "implicit-builder" in col
]][
    ~responses[rank_col("implicit-builder")].isna()
    &
    ~responses[rank_col("implicit-no-builder")].isna()
    &
    ~responses[approval_col("implicit-builder")].isna()
    &
    ~responses[approval_col("implicit-no-builder")].isna()
]

display(Markdown(f"Number of people who ranked both implicit-builder and implicit-no-builder: {len(ib_responses)}"))

ib_rank = ib_responses[rank_col("implicit-builder")]
inb_rank = ib_responses[rank_col("implicit-no-builder")]
ib_approval = ib_responses[approval_col("implicit-builder")].replace(ENCODED_APPROVALS)
inb_approval = ib_responses[approval_col("implicit-no-builder")].replace(ENCODED_APPROVALS)
inconsistent_ib_responses = ib_responses[
    (ib_approval != inb_approval)
    & ((ib_rank < inb_rank) != (ib_approval >= inb_approval))
]

display(Markdown(f"Number of people with inconsistent IB/INB ranking and approval: {len(inconsistent_ib_responses)}"))
display(inconsistent_ib_responses)

no_ib_preference = ib_responses[ib_approval == inb_approval]
display(Markdown(f"Number of people with no preference: {len(no_ib_preference)}"))
display(no_ib_preference)
display(Markdown(f"Number of people with no preference who ranked IB over INB: {
    len(ib_responses[(ib_approval == inb_approval) & (ib_rank < inb_rank)])
}"))
display(Markdown(f"Number of people with no preference who ranked INB over IB: {
    len(ib_responses[(ib_approval == inb_approval) & (inb_rank < ib_rank)])
}"))

Survey Results¶

Raw Data¶

Ranked-choice results¶

Approval results¶

Commentary¶

Appendix¶

implicit-builder's next choice¶

Validate consistency of responses¶

Comments for posterity¶

	count
option
Loading ITables v2.3.0 from the `init_notebook_mode` cell... (need help?)

Comments from survey responses
IMO, the ideal solution would be `extensible-th` as the most general solution, but we would also have a standard interpolator to be implemented as `implicit-no-builder` in some standard module, so that if one uses that interpolator, TH would actually not be needed for compilation, so that a weaker form of string interpolation is still possible even when TH isn't available. This would also encourage the ecosystem to implement the non-TH interpolation instances where possible.
Why doesn't extensible-hasclass variant uses builder like implicit-builder one?
Implicit with builder is the most ergonomic. Less code and easy to read.
I don't understand the HasClass approach
No mandatory TemplateHaskell please, at least until its support (performance, cross-compilation etc.) is improved.
Thanks for doing this!
In the proposal you refer to Python's f-strings, but the proposed feature is more like Python's new t-strings. It would be good to relate to that.
I appreciate your use of rank AND (dis)approval voting. String interpolation isn't usually a big clarity gain for me, so I think that to be useful at all this should either be as powerful as possible (TH) or clear and foolproof as possible (explicit, the risk of being useless).
There are tons of options using TH / QuasiQuoters already. The one thing that stands out from a built-in is - no TH!
I think that any solution that does not allow any possibly-qualified identifier as a delimiter is rather pointless. The real power here is being able to define interpolators similar to how Scala has it. I'm not really confident however that including template haskell would be ideal as it severely hampers cross-compilation. For me this is a really wanted feature, so I would hope to be able to use it without it needing serious consideration on future scalability of my project.
to support sql-like interpolation more options could or should support failure states
If the string prefix takes syntax from function application, my feeling is that it should be implementable as an application of that function to the desugaring of the string. I’d be content with module qualification of string literals (and number literals) in the same style as ‘QualifiedDo’, e.g. ‘import Data.String.Interpolated qualified as S; example = … S."a ${x} b" …’.
I think implicit builder is by far the most promising option. The main concern appears to be error messages and ambiguous types, but I have strong opinions about this. The first among which is that I believe the concern about error messages is a case of the "XY" Problem, as it's been called recently. If the problem is bad error messages, then the problem is bad error messages, not necessarily the construct that led to them. I saw an issue raised in the github issues section with almost exactly this complaint, with someone saying they were unhappy due to a complicated error message arising from a type ambiguity, but I believe firmly that this should be another issue entirely. Maybe we just need special errors for ambiguous string interpolations, something that is absolutely possible and can be added in a future revision. Further, I think that the ambiguity issue is less of a problem than ever now that exportable named defaults have been (finally!) merged into GHC. I think it would be very elegant for packages which define instances for the implicit builder class to also export named defaults for classes like IsString and Buildable. I honestly view this as more or less the best of all possible worlds. Users get good behavior, by default, backed by a powerful interface, but have the opportunity to override it should their needs differ. implicit-no-builder is the obvious second choice. More or less the same functionality, if not slightly less elegant. I ranked extensible-th close to last on the ranking scale, but I would not be unhappy with it. I think it is just more unrealistic to implement, as the complaints about cross platform TH are already deep enough that we should probably try not to further the burden, as the WASM and JS backends continue to grow in popularity. Being powerful enough to implement all other modes, it also inherits their faults. If the error messages are the biggest concern with implicit builder, surely the TH mode would inherit that issue and make it even worse. I think implicit-only-string and extensible-hasclass are probably nonstarters. I don't think we should be trading power for simplicity in this case, I personally think we should lean into the power of GHC and its new features and embrace them. I understand and empathize with the argument against this. Haskell is already a complicated language and newcomers often find themselves overwhelmed, do we really want to make something like string interpolation, a feature supported in every other modern language with no issues, even harder for them? But I think if we take a step back and look at the interface, it's really not that bad. It does invoke some slightly more advanced concepts, sure, you have multi parameter typeclasses and an injectivity annotation if you look at the implementation, sure. But if it just works 99% of the time, and you can always fall back to explicitly constructing your string type without interpolation, is it really worth that sacrifice? I don't think so personally. I wouldn't fault people who do, but I really just don't think it's that impenetrable.
The explicit one is simultaneously very powerful and has the least magic. The user is responsible for making sure that the interpolated value has the right type. Maybe a variant of explicit where the interpolated value has fromString applied to it to cover String->Text cases would be nice, but I haven't thought the consequences of that idea.
is it possible to not require `:: String` in `print (s"Hello ${name}! Your age is ${age}" :: String)`?
I have not had the opportunity to use these directly, so my voting is based purely on the descriptions. I think extensibility is very important, and my only concern is usability e.g. type inference, error messages.
I want this to behaves exactly as string literal. So "explicit", so when `OverloadedStrings` will be revamped, it will benefit from this. I don't want to have to handle `instance` errors, hence why I don't like the implicit/implicit builder/... But that's still fine. Regarding extensible-hasclass: great design, but I'm afraid it is too complex and will lead to errors and frustrations. Regarding extensible-th: if you want the full power of TH, use quasiquotes (we can however find a way to improve quasiquotes). Everything (and more) that extensible-th proposes can already be done in external libraries (such as PyF) as template haskell, or using source plugin, so I don't see the point of introducing something complex, possibly confusing, which is less powerful than what is already achieved since years.
Do not use TH!
String interpolation should not be only limited to String type. Template Haskell is slow and is a pain to work with in code formatters and cross-compilation so we should move away from it as much as possible.
Keep it simple.
I think it's good to solicit feedback like this on a proposal but I would prefer it if you decided yourself what the best option was. You have invested many hours of effort into thinking about this, so you should trust what you think is best.
The simplest please, less extensions, this excludes TH and favors explicit and implicit-only-string
I hope we get an interface that is opaque enough to swap the backends later for more performance without breaking interface. Class-based interfaces seem to leak too much into use-sites. TH is bad but if we can have a PureTH that cannot call Haskell code, that would be a great backend for compile-time interpolation without the risk of executing platform-specific code.
Moritz has convinced me several times that TH itself is a mistake, so prefer avoiding it. I think sophisticated structures should be using eDSL combinators, not strings. So I favor implicity-only-string to confine fancier strings to "String"s. I could see support for IsString, I guess. The differently named heralds is cute, but excessive flexibility IMO: something more complicated than a sequence should be built using appropriate combinators, not via strings, interpolated or not. extensible-hasclass looks insufficient to me because different parts of, say, an SQL query really deserve different classes (expressions like in your example, grouping stuff, table names, etc).
One of the use-cases given in examples is SQL. This is a tricky example, hard (though certainly possible) to get right, but, even if done right, by hiding the fact there's stuff happening between the scenes it will encourage bad things like "where myname ='"<>name<>"'" without interpolation, and that's a path to disaster. And using TH for string interpolation? We already have it (QuasiQuoter). And it's a huge overkill for just string interpolation.
I believe TemplateHaskell is something the community should avoid for now because of the issues plaguing it with regards to portability across different systems (I do not mean across Haskell compilers), and due to the awkwardness of that interface. I largely prefer implicit-no-builder (it's simpler than implicit-builder, and seems to provide better error messages), but it's a little irritating that there's no other interpolation function besides `s`. I like extensible-hasclass, but I would only be ""Really Happy"" with it if the error messages can be made to be as good as implicit-no-builder. But, I'm also a little unhappy with the ability to add custom string interpolation capability unless they're well documented, because I feel like they could be easy to miss.
Watching the Python community reinvent the string formatting wheel again with t-strings makes me think it's worth letting this cook. We should either do something extremely simple or cook it extremely thoroughly in a middleweight library that real projects can use. https://discourse.haskell.org/t/ghc-string-interpolation-final-survey/11895/9 seems really important. We really need to see these trialed in nontrivial codebases to know what the type inference situation etc is going to be like. In particular, MPTCs with no fundeps are a magnet for manual annotations and ambiguity errors. extensible-th should provide a TH function to break the string up for interpolation - having a function application against a string desugar into a bunch of TH could be pretty surprising. It is critical to me that whatever interpolation scheme is proposed does not default through the `Show` instance of types it's interpolating. I have had so many log files include unreadable piles of `show` output and/or sensitive data because the interpolation library merrily compiled something it shouldn't've.
I am against everything that results in creation of values of type [Char] at runtime. Text is not the future, it is today and String should be gone. Also, I am generally against adding built-in string interpolation syntax. In my opinion Template Haskell should be fixed and then we could create libraries based on that and see which ones gain popularity. I also believe that if there is a built-in string interpolation syntax, there needs to be a way to do string interpolation at run time. Message translation needs run-time interpolation and language should support it rather than making it more difficult (by giving programmers interpolation tools which are not available at run time, when message selection happens). In principal I support maximum extensibility and customization. If I was to design this kind of extension, I would write a library that would parse the template strings, format interpolated values according to the template (the template should support format control) and build a Text value out of all this. The user should be able to add type-specific format control directives. Interpolation could be done using Template Haskell, if done at compile time. If done at run time, the user would need to supply a map from keys to values. But... good type errors are a must. Therefore a multiparam type class without functional dependencies is probably a bad idea. The system should also adapt to situations where the concrete type of an interpolated value is not known. (The value might be accessed through an existentially quantified data type, and could be an instance of a specific type class.) Even though I am against the proposed extension (at this point), I really appreciate the discusiion and the initiative. Thank you for putting very much effort into this.